On 11/09/2025 00:31, Collin Funk wrote:
Pádraig Brady <[email protected]> writes:

This is useful to give better test coverage at least,
and may be useful for users who already may have
GLIBC_TUNABLES defined to tune their environment,
avoiding CPU throttling for example.

Probably worth documenting this behavior.

Although it is hard to imagine that many people use the glibc.cpu
tunables to disable capabilities that their hardware supports, one might
be surprised that Coreutils checks them unless it is documented.

I might do a "Hardware acceleration" section in the texinfo that
documents the supported accelerations in cksum, wc, sha*sum etc,
the configure options (--with-linux-crypto, --with-openssl).
I could also mention there the honored env vars.
Also, worth mentioning that the OpenSSL implementations of digests won't
respect this.
OpenSSL has its own settings (OPENSSL_ia32cap):

  $ truncate -s1G file.big

  $ time cksum -a sha1 file.big
  real  0m1.362s

  $ export OPENSSL_ia32cap="0x0"  # Disable all CPU features
  $ time cksum -a sha1 file.big
  real  0m1.942s

You'd be surprised at how useful these settings are.
At a previous job most services had these GLIBC and OPENSSL
environment settings at least, to avoid AVX512 induced CPU throttling,
which was worth many $

I realized the compiler could deduce the feature->HWCAP mapping at compile time,
so I rearranged a bit in the attached.

cheers,
Padraig
From 69655f0ea80941cd41d95e14068da386f5796d56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <[email protected]>
Date: Wed, 10 Sep 2025 22:56:33 +0100
Subject: [PATCH] cksum,wc: support disabling hardware acceleration at runtime

This is useful to give better test coverage at least,
and may be useful for users who already may have
GLIBC_TUNABLES defined to tune their environment,
avoiding CPU throttling for example.

* src/cpu-supports.h: A new header that provides cpu_supports()
that checks the GLIBC_TUNABLES environment variable allows
the hardware feature, before checking with __builtin_cpu_supports().
* src/cksum.c: Use cpu_supports() rather than __builtin_cpu_supports().
* src/wc.c: Likewise.
* src/local.mk: Reference the new header.
* tests/cksum/cksum.sh: Adjust to testing all implementations.
* tests/wc/wc-cpu.sh: A new test to do likewise.
* tests/local.mk: Reference the new wc test.
---
 src/cksum.c          | 26 ++++++++++------
 src/cpu-supports.h   | 73 ++++++++++++++++++++++++++++++++++++++++++++
 src/local.mk         |  1 +
 src/wc.c             |  3 +-
 tests/cksum/cksum.sh | 23 ++++++++++----
 tests/local.mk       |  1 +
 tests/wc/wc-cpu.sh   | 34 +++++++++++++++++++++
 7 files changed, 144 insertions(+), 17 deletions(-)
 create mode 100644 src/cpu-supports.h
 create mode 100755 tests/wc/wc-cpu.sh

diff --git a/src/cksum.c b/src/cksum.c
index 310986912..62fa4e4be 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -134,6 +134,7 @@ main (void)
 # endif
 
 # include "crc.h"
+# include "cpu-supports.h"
 
 /* Number of bytes to read at once.  */
 # define BUFLEN (1 << 16)
@@ -143,9 +144,9 @@ typedef bool (*cksum_fp_t) (FILE *, uint_fast32_t *, uintmax_t *);
 static cksum_fp_t
 pclmul_supported (void)
 {
-# if USE_PCLMUL_CRC32 || GL_CRC_X86_64_PCLMUL
-  bool pclmul_enabled = (0 < __builtin_cpu_supports ("pclmul")
-                         && 0 < __builtin_cpu_supports ("avx"));
+# if USE_PCLMUL_CRC32
+  bool pclmul_enabled = (cpu_supports ("avx")
+                         && cpu_supports ("pclmul"));
   if (cksum_debug)
     error (0, 0, "%s",
            (pclmul_enabled
@@ -165,8 +166,8 @@ avx2_supported (void)
      the avx512 version, but it implies that the avx2 version
      is supported  */
 # if USE_AVX2_CRC32
-  bool avx2_enabled = (0 < __builtin_cpu_supports ("vpclmulqdq")
-                       && 0 < __builtin_cpu_supports ("avx2"));
+  bool avx2_enabled = (cpu_supports ("avx2")
+                       && cpu_supports ("vpclmulqdq"));
   if (cksum_debug)
     error (0, 0, "%s",
            (avx2_enabled
@@ -186,9 +187,10 @@ avx512_supported (void)
      mavx512f for most of the avx512 functions we're using
      mavx512bw for byte swapping  */
 # if USE_AVX512_CRC32
-  bool avx512_enabled = (0 < __builtin_cpu_supports ("vpclmulqdq")
-                         && 0 < __builtin_cpu_supports ("avx512bw")
-                         && 0 < __builtin_cpu_supports ("avx512f"));
+  bool avx512_enabled = (cpu_supports ("avx512f")
+                         && cpu_supports ("avx512bw")
+                         && cpu_supports ("vpclmulqdq"));
+
   if (cksum_debug)
     error (0, 0, "%s",
            (avx512_enabled
@@ -206,7 +208,8 @@ vmull_supported (void)
 {
   /* vmull for multiplication  */
 # if USE_VMULL_CRC32
-  bool vmull_enabled = (getauxval (AT_HWCAP) & HWCAP_PMULL) > 0;
+  bool vmull_enabled = (cpu_may_support ("pmull")
+                        && ((getauxval (AT_HWCAP) & HWCAP_PMULL) > 0);
   if (cksum_debug)
     error (0, 0, "%s",
            (vmull_enabled
@@ -325,7 +328,10 @@ crc32b_sum_stream (FILE *stream, void *resstream, uintmax_t *reslen)
 
 # if GL_CRC_X86_64_PCLMUL
   if (cksum_debug)
-    (void) pclmul_supported ();
+    error (0, 0, "%s",
+           (__builtin_cpu_supports ("pclmul")  /* Match gnulib.  */
+            ? _("using pclmul hardware support")
+            : _("pclmul support not detected")));
 # endif
 
   while ((bytes_read = fread (buf, 1, BUFLEN, stream)) > 0)
diff --git a/src/cpu-supports.h b/src/cpu-supports.h
new file mode 100644
index 000000000..44580ea4a
--- /dev/null
+++ b/src/cpu-supports.h
@@ -0,0 +1,73 @@
+#include <assert.h>
+
+#define cpu_supports(feature) \
+  (hwcap_allowed (gcc_feature_to_glibc_hwcap (feature)) \
+   && __builtin_cpu_supports (feature))
+
+ATTRIBUTE_PURE
+static inline char const*
+gcc_feature_to_glibc_hwcap (char const* feature)
+{
+  char const* hwcap = nullptr;
+
+  if (0);
+#if defined __x86_64__
+  else if (STREQ (feature, "avx"))          hwcap = "-AVX";
+  else if (STREQ (feature, "avx2"))         hwcap = "-AVX2";
+  else if (STREQ (feature, "avx512bw"))     hwcap = "-AVX512BW";
+  else if (STREQ (feature, "avx512f"))      hwcap = "-AVX512F";
+  else if (STREQ (feature, "pclmul"))       hwcap = "-PCLMULQDQ";
+  else if (STREQ (feature, "vpclmulqdq"))   hwcap = "-VPCLMULQDQ";
+#elif defined __aarch64__
+  else if (STREQ (feature, "pmull"))        hwcap "-PMULL";
+#endif
+
+  return hwcap;
+}
+
+/* Support GLIBC's interface to disable features using:
+    export GLIBC_TUNABLES=glibc.cpu.hwcaps=-AVX512F,-AVX2,-AVX,-PMULL
+   Return true if the HWCAP is allowed.  */
+static inline bool
+hwcap_allowed (char const* glibc_hwcap)
+{
+  if (! glibc_hwcap)
+    return true;
+
+  /* Match how GLIBC parses tunables as indicated with:
+     GLIBC_TUNABLES=glibc.cpu.hwcaps=... ld.so --list-tunables | grep hwcaps  */
+  static char const *GLIBC_TUNABLES;
+  if (! GLIBC_TUNABLES)
+    { /* Cache glibc.cpu.hwcaps once per process.  */
+      if ((GLIBC_TUNABLES = getenv ("GLIBC_TUNABLES")))
+        {
+          char const *tunables_start = GLIBC_TUNABLES;
+          char const *last_hwcaps;
+          while ((last_hwcaps = strstr (GLIBC_TUNABLES, "glibc.cpu.hwcaps=")))
+            GLIBC_TUNABLES = last_hwcaps + sizeof "glibc.cpu.hwcaps=" - 1;
+          if (GLIBC_TUNABLES == tunables_start)  /* No match.  */
+            GLIBC_TUNABLES = "";
+        }
+      else
+        GLIBC_TUNABLES = "";
+    }
+
+  assert (GLIBC_TUNABLES);
+
+  if (! *GLIBC_TUNABLES)
+    return true;
+
+  char const *sentinel = strchr (GLIBC_TUNABLES, ':');
+  if (! sentinel)
+    sentinel = GLIBC_TUNABLES + strlen (GLIBC_TUNABLES);
+  char const *cap = GLIBC_TUNABLES;
+  while ((cap = strstr (cap, glibc_hwcap)) && cap < sentinel)
+    { /* Check it's not a partial match.  */
+      cap += strlen (glibc_hwcap);
+      if (*cap == ',' || *cap == ':' || *cap == '\0')
+        return false;  /* Feature disabled.  */
+      /* glibc hwcaps can't have '-' in name so ok to search from here. */
+    }
+
+  return true;
+}
diff --git a/src/local.mk b/src/local.mk
index c7c77a7c9..09b3efdf1 100644
--- a/src/local.mk
+++ b/src/local.mk
@@ -43,6 +43,7 @@ noinst_HEADERS =		\
   src/chown-core.h		\
   src/copy.h			\
   src/cp-hash.h			\
+  src/cpu-supports.h		\
   src/dircolors.h		\
   src/expand-common.h		\
   src/find-mount-point.h	\
diff --git a/src/wc.c b/src/wc.c
index 268b947bb..5f974e1c0 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -33,6 +33,7 @@
 #include <xbinary-io.h>
 
 #include "system.h"
+#include "cpu-supports.h"
 #include "ioblksize.h"
 #include "wc.h"
 
@@ -133,7 +134,7 @@ static enum total_type total_mode = total_auto;
 static bool
 avx2_supported (void)
 {
-  bool avx_enabled = 0 < __builtin_cpu_supports ("avx2");
+  bool avx_enabled = cpu_supports ("avx2");
 
   if (debug)
     error (0, 0, (avx_enabled
diff --git a/tests/cksum/cksum.sh b/tests/cksum/cksum.sh
index 83f61ad2f..e216183c6 100755
--- a/tests/cksum/cksum.sh
+++ b/tests/cksum/cksum.sh
@@ -22,15 +22,26 @@ print_ver_ cksum printf
 
 returns_ 1 cksum missing 2> /dev/null || fail=1
 
+GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX512F,-AVX2,-AVX,-PMULL' \
+ cksum --debug /dev/null 2>debug || fail=1
+grep 'using.*hardware support' debug && fail=1
+
 # Pass in expected crc and crc32b for file "in"
 # Sets fail=1 upon failure
 crc_check() {
-  for crct in crc crc32b; do
-    cksum -a $crct in > out || fail=1
-    case "$crct" in crc) crce="$1";; crc32b) crce="$2";; esac
-    size=$(stat -c %s in) || framework_failure_
-    printf '%s\n' "$crce $size in" > exp || framework_failure_
-    compare exp out || fail=1
+  TUNABLE_DISABLE='glibc.cpu.hwcaps='
+  for DHW in NONE AVX512F AVX2 AVX PMULL; do
+    TUNABLE_DISABLE="$TUNABLE_DISABLE-$DHW,"
+    for crct in crc crc32b; do
+      GLIBC_TUNABLES="$TUNABLE_DISABLE" \
+       cksum -a $crct in || fail=1
+      GLIBC_TUNABLES="$TUNABLE_DISABLE" \
+       cksum -a $crct in > out || fail=1
+      case "$crct" in crc) crce="$1";; crc32b) crce="$2";; esac
+      size=$(stat -c %s in) || framework_failure_
+      printf '%s\n' "$crce $size in" > exp || framework_failure_
+      compare exp out || fail=1
+    done
   done
 }
 
diff --git a/tests/local.mk b/tests/local.mk
index 885787c3a..67a919e84 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -283,6 +283,7 @@ all_tests =					\
   tests/cut/cut.pl				\
   tests/cut/cut-huge-range.sh			\
   tests/wc/wc.pl				\
+  tests/wc/wc-cpu.sh				\
   tests/wc/wc-files0-from.pl			\
   tests/wc/wc-files0.sh				\
   tests/wc/wc-nbsp.sh				\
diff --git a/tests/wc/wc-cpu.sh b/tests/wc/wc-cpu.sh
new file mode 100755
index 000000000..725817a7c
--- /dev/null
+++ b/tests/wc/wc-cpu.sh
@@ -0,0 +1,34 @@
+#!/bin/sh
+# Ensure cpu specific code operates correctly
+
+# Copyright (C) 2025 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ wc
+
+GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX2' \
+ wc --debug /dev/null 2>debug || fail=1
+grep 'using.*hardware support' debug && fail=1
+
+lines=$(shuf -i 0-1000 | head -n1)  || framework_failure_
+seq 1000 | head -n "$lines" > lines || framework_failure_
+
+wc_accelerated=$(wc -l < lines) || fail=1
+wc_base=$(GLIBC_TUNABLES='glibc.cpu.hwcaps=-AVX2' wc -l < lines) || fail=1
+
+test "$wc_accelerated" = "$wc_base" || fail=1
+
+Exit $fail
-- 
2.50.1

Reply via email to