On 6/14/23 03:46, Pádraig Brady wrote:

Paul you removed the "avx" check from cksum.c. Was that intended?

No, it's a typo I introduced. Thanks for catching that. Fixed in the first attached patch.

While looking into this I noticed a couple of other cleanups, fixed in the other attached patches.

I installed these into coreutils on Savannah.
From 7814596fa91a07fb2f1d0972f93f26de8a4ad547 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jun 2023 14:13:35 -0700
Subject: [PATCH 1/3] cksum: fix bug in check for cksum_pclmul
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This fixes a typo in the previous patch.
Problem reported by Pádraig Brady <https://bugs.gnu.org/64058#11>.
* src/cksum.c (pclmul_supported): Also require AVX support
to use cksum_pclmul.
---
 src/cksum.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/cksum.c b/src/cksum.c
index 631ac3449..e935ba75c 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -160,7 +160,8 @@ static bool
 pclmul_supported (void)
 {
 # if USE_PCLMUL_CRC32
-  bool pclmul_enabled = 0 < __builtin_cpu_supports ("pclmul");
+  bool pclmul_enabled = (0 < __builtin_cpu_supports ("pclmul")
+                         && 0 < __builtin_cpu_supports ("avx"));
 
   if (cksum_debug)
     error (0, 0, "%s",
-- 
2.40.1

From 4ac941565fc1f7c1eb7954302f2ec20435fdf34c Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jun 2023 14:18:42 -0700
Subject: [PATCH 2/3] =?UTF-8?q?cksum,wc:=20don=E2=80=99t=20include=20<cpui?=
 =?UTF-8?q?d.h>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/cksum.c [!CRCTAB && USE_PCLMUL_CRC32]:
* src/wc.c [USE_AVX2_WC_LINECOUNT]:
Don’t include <cpuid.h>; no longer needed.
---
 src/cksum.c | 4 +---
 src/wc.c    | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/cksum.c b/src/cksum.c
index e935ba75c..352a0ba3a 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -141,9 +141,7 @@ main (void)
 # include "error.h"
 
 # include "cksum.h"
-# if USE_PCLMUL_CRC32
-#  include "cpuid.h"
-# else
+# if !USE_PCLMUL_CRC32
 #  define cksum_pclmul cksum_slice8
 # endif /* USE_PCLMUL_CRC32 */
 
diff --git a/src/wc.c b/src/wc.c
index 3708d0b8f..ebe83af4d 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -38,9 +38,6 @@
 #include "safe-read.h"
 #include "stat-size.h"
 #include "xbinary-io.h"
-#ifdef USE_AVX2_WC_LINECOUNT
-# include <cpuid.h>
-#endif
 
 #if !defined iswspace && !HAVE_ISWSPACE
 # define iswspace(wc) \
-- 
2.40.1

From f780a85985f5b069ba8597aaeac49eb74864926a Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jun 2023 14:52:37 -0700
Subject: [PATCH 3/3] cksum,wc: clean up hw capability checking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* src/cksum.c (cksum_pclmul) [!CRCTAB && !USE_PCLMUL_CRC32]:
Remove macro.
(cksum_fp): No longer file-scope.
(pclmul_supported): Define only if USE_PCLMUL_CRC32.
This omits the debug output "using generic hardware support"
for simplicity and consistency with wc’s output.
(crc_sum_stream) [!USE_PCLMUL_32]: No need for static function pointer.
* src/wc.c (wc_lines_p) [USE_AVX2_WC_LINECOUNT]: No longer file-scope.
(wc) [USE_AVX2_WC_LINECOUNT]: Check for avx2 support at most once,
which was surely the code’s original intent.
(wc) [!USE_AVX2_WC_LINECOUNT]: No need for static function pointer.
---
 src/cksum.c | 29 ++++++++---------------------
 src/wc.c    | 14 ++++++--------
 2 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/src/cksum.c b/src/cksum.c
index 352a0ba3a..26bb29bdb 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -141,23 +141,14 @@ main (void)
 # include "error.h"
 
 # include "cksum.h"
-# if !USE_PCLMUL_CRC32
-#  define cksum_pclmul cksum_slice8
-# endif /* USE_PCLMUL_CRC32 */
 
 /* Number of bytes to read at once.  */
 # define BUFLEN (1 << 16)
 
-
-static bool
-cksum_slice8 (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out);
-static bool
-  (*cksum_fp)(FILE *, uint_fast32_t *, uintmax_t *);
-
+# if USE_PCLMUL_CRC32
 static bool
 pclmul_supported (void)
 {
-# if USE_PCLMUL_CRC32
   bool pclmul_enabled = (0 < __builtin_cpu_supports ("pclmul")
                          && 0 < __builtin_cpu_supports ("avx"));
 
@@ -168,12 +159,8 @@ pclmul_supported (void)
             : _("pclmul support not detected")));
 
   return pclmul_enabled;
-# else
-  if (cksum_debug)
-    error (0, 0, "%s", _("using generic hardware support"));
-  return false;
-# endif /* USE_PCLMUL_CRC32 */
 }
+# endif /* USE_PCLMUL_CRC32 */
 
 static bool
 cksum_slice8 (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out)
@@ -238,13 +225,13 @@ crc_sum_stream (FILE *stream, void *resstream, uintmax_t *length)
   uintmax_t total_bytes = 0;
   uint_fast32_t crc = 0;
 
+# if USE_PCLMUL_CRC32
+  static bool (*cksum_fp) (FILE *, uint_fast32_t *, uintmax_t *);
   if (! cksum_fp)
-    {
-       if (pclmul_supported ())
-         cksum_fp = cksum_pclmul;
-       else
-         cksum_fp = cksum_slice8;
-    }
+    cksum_fp = pclmul_supported () ? cksum_pclmul : cksum_slice8;
+#else
+  bool (*cksum_fp) (FILE *, uint_fast32_t *, uintmax_t *) = cksum_slice8;
+#endif
 
   if (! cksum_fp (stream, &crc, &total_bytes))
     return -1;
diff --git a/src/wc.c b/src/wc.c
index ebe83af4d..30214655c 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -54,18 +54,12 @@
 /* Size of atomic reads. */
 #define BUFFER_SIZE (16 * 1024)
 
-static bool
-wc_lines (char const *file, int fd, uintmax_t *lines_out,
-          uintmax_t *bytes_out);
 #ifdef USE_AVX2_WC_LINECOUNT
 /* From wc_avx2.c */
 extern bool
 wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
                uintmax_t *bytes_out);
 #endif
-static bool
-(*wc_lines_p) (char const *file, int fd, uintmax_t *lines_out,
-                uintmax_t *bytes_out) = wc_lines;
 
 static bool debug;
 
@@ -441,8 +435,12 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
   else if (!count_chars && !count_complicated)
     {
 #ifdef USE_AVX2_WC_LINECOUNT
-      if (avx2_supported ())
-        wc_lines_p = wc_lines_avx2;
+      static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
+      if (!wc_lines_p)
+        wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
+#else
+      bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
+        = wc_lines;
 #endif
 
       /* Use a separate loop when counting only lines or lines and bytes --
-- 
2.40.1

Reply via email to