On Thu, Apr 18, 2024 at 08:24:03PM +0000, Devulapalli, Raghuveer wrote: >> This seems to contradict the note about doing step 3 at any point, and >> given step 1 is the OSXSAVE check, I'm not following what this means, >> anyway. > > It is recommended that you run the xgetbv code before you check for cpu > features avx512-popcnt and avx512-bw. The way it is written now is the > opposite order. I would also recommend splitting the cpuid feature check > for avx512popcnt/avx512bw and xgetbv section into separate functions to > make them modular. Something like: > > static inline > int check_os_avx512_support(void) > { > // (1) run cpuid leaf 1 to check for xgetbv instruction support: > unsigned int exx[4] = {0, 0, 0, 0}; > __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); > if ((exx[2] & (1 << 27)) == 0) /* xsave */ > return false; > > /* Does XGETBV say the ZMM/YMM/XMM registers are enabled? */ > return (_xgetbv(0) & 0xe0) == 0xe0; > } > >> I'm also wondering if we need to check that (_xgetbv(0) & 0xe6) == 0xe6 >> instead of just (_xgetbv(0) & 0xe0) != 0, as the status of the lower >> half of some of the ZMM registers is stored in the SSE and AVX state >> [0]. I don't know how likely it is that 0xe0 would succeed but 0xe6 >> wouldn't, but we might as well make it correct. > > This is correct. It needs to check all the 3 bits (XMM/YMM and ZMM). The > way it is written is now is in-correct.
Thanks for the feedback. I've attached an updated patch. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From d20b19804a17d9f6eab1d40de7e9fb10488ac6b0 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nat...@postgresql.org> Date: Thu, 18 Apr 2024 15:57:56 -0500 Subject: [PATCH v2 1/1] osxsave --- src/port/pg_popcount_avx512_choose.c | 89 +++++++++++++++++++--------- 1 file changed, 60 insertions(+), 29 deletions(-) diff --git a/src/port/pg_popcount_avx512_choose.c b/src/port/pg_popcount_avx512_choose.c index ae3fa3d306..009f94909a 100644 --- a/src/port/pg_popcount_avx512_choose.c +++ b/src/port/pg_popcount_avx512_choose.c @@ -34,27 +34,47 @@ #ifdef TRY_POPCNT_FAST /* - * Returns true if the CPU supports the instructions required for the AVX-512 - * pg_popcount() implementation. + * Does CPUID say there's support for XSAVE instructions? */ -bool -pg_popcount_avx512_available(void) +static inline bool +xsave_available(void) { unsigned int exx[4] = {0, 0, 0, 0}; - /* Does CPUID say there's support for AVX-512 popcount instructions? */ -#if defined(HAVE__GET_CPUID_COUNT) - __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUIDEX) - __cpuidex(exx, 7, 0); +#if defined(HAVE__GET_CPUID) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUID) + __cpuid(exx, 1); #else #error cpuid instruction not available #endif - if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */ - return false; + return (exx[2] & (1 << 27)) != 0; /* osxsave */ +} + +/* + * Does XGETBV say the ZMM registers are enabled? + * + * NB: Caller is responsible for verifying that xsave_available() returns true + * before calling this. + */ +static inline bool +zmm_regs_available(void) +{ +#ifdef HAVE_XSAVE_INTRINSICS + return (_xgetbv(0) & 0xe6) != 0xe6; +#else + return false; +#endif +} + +/* + * Does CPUID say there's support for AVX-512 popcount instructions? + */ +static inline bool +avx512_popcnt_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; - /* Does CPUID say there's support for AVX-512 byte and word instructions? */ - memset(exx, 0, sizeof(exx)); #if defined(HAVE__GET_CPUID_COUNT) __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); #elif defined(HAVE__CPUIDEX) @@ -62,27 +82,38 @@ pg_popcount_avx512_available(void) #else #error cpuid instruction not available #endif - if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */ - return false; + return (exx[2] & (1 << 14)) != 0; /* avx512-vpopcntdq */ +} - /* Does CPUID say there's support for XSAVE instructions? */ - memset(exx, 0, sizeof(exx)); -#if defined(HAVE__GET_CPUID) - __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); -#elif defined(HAVE__CPUID) - __cpuid(exx, 1); +/* + * Does CPUID say there's support for AVX-512 byte and word instructions? + */ +static inline bool +avx512_bw_available(void) +{ + unsigned int exx[4] = {0, 0, 0, 0}; + +#if defined(HAVE__GET_CPUID_COUNT) + __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(HAVE__CPUIDEX) + __cpuidex(exx, 7, 0); #else #error cpuid instruction not available #endif - if ((exx[2] & (1 << 26)) == 0) /* xsave */ - return false; + return (exx[1] & (1 << 30)) != 0; /* avx512-bw */ +} - /* Does XGETBV say the ZMM registers are enabled? */ -#ifdef HAVE_XSAVE_INTRINSICS - return (_xgetbv(0) & 0xe0) != 0; -#else - return false; -#endif +/* + * Returns true if the CPU supports the instructions required for the AVX-512 + * pg_popcount() implementation. + */ +bool +pg_popcount_avx512_available(void) +{ + return xsave_available() && + zmm_regs_available() && + avx512_popcnt_available() && + avx512_bw_available(); } #endif /* TRY_POPCNT_FAST */ -- 2.25.1