Thanks for the bug report. I installed the attached patch into coreutils
on Savannah. It builds on your idea with several other changes:
* There's a similar issue with cksum.c and pclmul.
* configure.ac can be simplified, since it seems there's no point
compiling these instructions if __builtin_cpu_supports doesn't work.
* This lets us simplify the source code a bit more.
Please let me know if the attached patch works for you.
PS. Does the attached cksum.c / pclmul change fix any user-visible
misbehavior? If so, what should we put into the NEWS file?From 91a74d361461494dd546467e83bc36c24185d6e7 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Tue, 13 Jun 2023 21:10:24 -0700
Subject: [PATCH] wc: port to kernels that disable XSAVE YMM
Problem reported by Dave Hansen <https://bugs.gnu.org/64058>.
Apply similar change to cksum and pclmul, too.
* NEWS: Mention wc fix.
* configure.ac (cpuid_exists, get_cpuid_count_exists):
Remove. All uses removed, since we no longer use __get_cpuid or
__get_cpuid_count.
(pclmul_intrinsic_exists, avx2_intrinsic_exists): Set to no if
__builtin_cpu_supports calls cannot be compiled.
(HAVE_PCLMUL_INTRINSIC, HAVE_AVX2_INTRINSIC): Remove; unused.
Simplify surrounding code because of this.
* src/cksum.c (pclmul_supported):
* src/wc.c (avx2_supported):
Use __builtin_cpu_supports instead of doing it by hand.
Simplify surrounding code because of this.
---
NEWS | 3 +++
configure.ac | 67 ++++++++--------------------------------------------
src/cksum.c | 26 +++++---------------
src/wc.c | 50 +++++----------------------------------
4 files changed, 25 insertions(+), 121 deletions(-)
diff --git a/NEWS b/NEWS
index 7df9ff5b0..3350f9871 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,9 @@ GNU coreutils NEWS -*- outline -*-
'pr --length=1 --double-space' no longer enters an infinite loop.
[This bug was present in "the beginning".]
+ 'wc -l' no longer crashes on x86 Linux kernels that disable XSAVE YMM.
+ [bug introduced in coreutils-9.0]
+
** Changes in behavior
'cp -v' and 'mv -v' will no longer output a message for each file skipped
diff --git a/configure.ac b/configure.ac
index 520de8184..48dea9d01 100644
--- a/configure.ac
+++ b/configure.ac
@@ -548,27 +548,6 @@ CFLAGS=$ac_save_CFLAGS
LDFLAGS=$ac_save_LDFLAGS
ac_c_werror_flag=$cu_save_c_werror_flag
-AC_MSG_CHECKING([if __get_cpuid available])
-AC_LINK_IFELSE(
- [AC_LANG_SOURCE([[
- #include <cpuid.h>
-
- int
- main (void)
- {
- unsigned int eax, ebx, ecx, edx;
- __get_cpuid (1, &eax, &ebx, &ecx, &edx);
- return 1;
- }
- ]])
- ],[
- AC_MSG_RESULT([yes])
- AC_DEFINE([HAVE_CPUID], [1], [__get_cpuid available])
- cpuid_exists=yes
- ],[
- AC_MSG_RESULT([no])
- ])
-
ac_save_CFLAGS=$CFLAGS
CFLAGS="-mavx -mpclmul $CFLAGS"
AC_MSG_CHECKING([if pclmul intrinsic exists])
@@ -582,46 +561,23 @@ AC_COMPILE_IFELSE(
__m128i a, b;
a = _mm_clmulepi64_si128 (a, b, 0x00);
a = _mm_shuffle_epi8 (a, b);
- return 1;
+ return __builtin_cpu_supports ("pclmul");
}
]])
],[
- AC_MSG_RESULT([yes])
- AC_DEFINE([HAVE_PCLMUL_INTRINSIC], [1], [pclmul intrinsic exists])
pclmul_intrinsic_exists=yes
],[
- AC_MSG_RESULT([no])
+ pclmul_intrinsic_exists=no
])
-if test "x$cpuid_exists" = "xyes" &&
- test "x$pclmul_intrinsic_exists" = "xyes"; then
+AC_MSG_RESULT([$pclmul_intrinsic_exists])
+if test $pclmul_intrinsic_exists = yes; then
AC_DEFINE([USE_PCLMUL_CRC32], [1],
[CRC32 calculation by pclmul hardware instruction enabled])
fi
AM_CONDITIONAL([USE_PCLMUL_CRC32],
- [test "x$cpuid_exists" = "xyes" &&
- test "x$pclmul_intrinsic_exists" = "xyes"])
+ [test $pclmul_intrinsic_exists = yes])
CFLAGS=$ac_save_CFLAGS
-AC_MSG_CHECKING([if __get_cpuid_count exists])
-AC_LINK_IFELSE(
- [AC_LANG_SOURCE([[
- #include <cpuid.h>
-
- int
- main (void)
- {
- unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- __get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx);
- return 1;
- }
- ]])
- ],[
- AC_MSG_RESULT([yes])
- get_cpuid_count_exists=yes
- ],[
- AC_MSG_RESULT([no])
- ])
-
CFLAGS="-mavx2 $CFLAGS"
AC_MSG_CHECKING([if avx2 intrinstics exists])
AC_COMPILE_IFELSE(
@@ -633,23 +589,20 @@ AC_COMPILE_IFELSE(
{
__m256i a, b;
a = _mm256_sad_epu8 (a, b);
- return 1;
+ return __builtin_cpu_supports ("avx2");
}
]])
],[
- AC_MSG_RESULT([yes])
- AC_DEFINE([HAVE_AVX2_INTRINSIC], [1], [avx2 intrinsics exists])
avx2_intrinsic_exists=yes
],[
- AC_MSG_RESULT([no])
+ avx2_intrinsic_exists=no
])
-if test "x$get_cpuid_count_exists" = "xyes" &&
- test "x$avx2_intrinsic_exists" = "xyes"; then
+AC_MSG_RESULT([$avx2_intrinsic_exists])
+if test $avx2_intrinsic_exists = yes; then
AC_DEFINE([USE_AVX2_WC_LINECOUNT], [1], [Counting lines with AVX2 enabled])
fi
AM_CONDITIONAL([USE_AVX2_WC_LINECOUNT],
- [test "x$get_cpuid_count_exists" = "xyes" &&
- test "x$avx2_intrinsic_exists" = "xyes"])
+ [test $avx2_intrinsic_exists = yes])
CFLAGS=$ac_save_CFLAGS
diff --git a/src/cksum.c b/src/cksum.c
index 85afab0ac..631ac3449 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -160,29 +160,15 @@ static bool
pclmul_supported (void)
{
# if USE_PCLMUL_CRC32
- unsigned int eax = 0;
- unsigned int ebx = 0;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- if (! __get_cpuid (1, &eax, &ebx, &ecx, &edx))
- {
- if (cksum_debug)
- error (0, 0, "%s", _("failed to get cpuid"));
- return false;
- }
-
- if (! (ecx & bit_PCLMUL) || ! (ecx & bit_AVX))
- {
- if (cksum_debug)
- error (0, 0, "%s", _("pclmul support not detected"));
- return false;
- }
+ bool pclmul_enabled = 0 < __builtin_cpu_supports ("pclmul");
if (cksum_debug)
- error (0, 0, "%s", _("using pclmul hardware support"));
+ error (0, 0, "%s",
+ (pclmul_enabled
+ ? _("using pclmul hardware support")
+ : _("pclmul support not detected")));
- return true;
+ return pclmul_enabled;
# else
if (cksum_debug)
error (0, 0, "%s", _("using generic hardware support"));
diff --git a/src/wc.c b/src/wc.c
index becceda98..3708d0b8f 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -157,52 +157,14 @@ static enum total_type total_mode = total_auto;
static bool
avx2_supported (void)
{
- unsigned int eax = 0;
- unsigned int ebx = 0;
- unsigned int ecx = 0;
- unsigned int edx = 0;
- bool getcpuid_ok = false;
- bool avx_enabled = false;
-
- if (__get_cpuid (1, &eax, &ebx, &ecx, &edx))
- {
- getcpuid_ok = true;
- if (ecx & bit_OSXSAVE)
- avx_enabled = true; /* Support is not disabled. */
- }
-
-
- if (avx_enabled)
- {
- eax = ebx = ecx = edx = 0;
- if (! __get_cpuid_count (7, 0, &eax, &ebx, &ecx, &edx))
- getcpuid_ok = false;
- else
- {
- if (! (ebx & bit_AVX2))
- avx_enabled = false; /* Hardware doesn't support it. */
- }
- }
+ bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
+ if (debug)
+ error (0, 0, (avx_enabled
+ ? _("using avx2 hardware support")
+ : _("avx2 support not detected")));
- if (! getcpuid_ok)
- {
- if (debug)
- error (0, 0, "%s", _("failed to get cpuid"));
- return false;
- }
- else if (! avx_enabled)
- {
- if (debug)
- error (0, 0, "%s", _("avx2 support not detected"));
- return false;
- }
- else
- {
- if (debug)
- error (0, 0, "%s", _("using avx2 hardware support"));
- return true;
- }
+ return avx_enabled;
}
#endif
--
2.40.1