On 6/14/23 03:46, Pádraig Brady wrote:
Paul you removed the "avx" check from cksum.c. Was that intended?
No, it's a typo I introduced. Thanks for catching that. Fixed in the
first attached patch.
While looking into this I noticed a couple of other cleanups, fixed in
the other attached patches.
I installed these into coreutils on Savannah.
From 7814596fa91a07fb2f1d0972f93f26de8a4ad547 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jun 2023 14:13:35 -0700
Subject: [PATCH 1/3] cksum: fix bug in check for cksum_pclmul
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This fixes a typo in the previous patch.
Problem reported by Pádraig Brady <https://bugs.gnu.org/64058#11>.
* src/cksum.c (pclmul_supported): Also require AVX support
to use cksum_pclmul.
---
src/cksum.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/cksum.c b/src/cksum.c
index 631ac3449..e935ba75c 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -160,7 +160,8 @@ static bool
pclmul_supported (void)
{
# if USE_PCLMUL_CRC32
- bool pclmul_enabled = 0 < __builtin_cpu_supports ("pclmul");
+ bool pclmul_enabled = (0 < __builtin_cpu_supports ("pclmul")
+ && 0 < __builtin_cpu_supports ("avx"));
if (cksum_debug)
error (0, 0, "%s",
--
2.40.1
From 4ac941565fc1f7c1eb7954302f2ec20435fdf34c Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jun 2023 14:18:42 -0700
Subject: [PATCH 2/3] =?UTF-8?q?cksum,wc:=20don=E2=80=99t=20include=20<cpui?=
=?UTF-8?q?d.h>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* src/cksum.c [!CRCTAB && USE_PCLMUL_CRC32]:
* src/wc.c [USE_AVX2_WC_LINECOUNT]:
Don’t include <cpuid.h>; no longer needed.
---
src/cksum.c | 4 +---
src/wc.c | 3 ---
2 files changed, 1 insertion(+), 6 deletions(-)
diff --git a/src/cksum.c b/src/cksum.c
index e935ba75c..352a0ba3a 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -141,9 +141,7 @@ main (void)
# include "error.h"
# include "cksum.h"
-# if USE_PCLMUL_CRC32
-# include "cpuid.h"
-# else
+# if !USE_PCLMUL_CRC32
# define cksum_pclmul cksum_slice8
# endif /* USE_PCLMUL_CRC32 */
diff --git a/src/wc.c b/src/wc.c
index 3708d0b8f..ebe83af4d 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -38,9 +38,6 @@
#include "safe-read.h"
#include "stat-size.h"
#include "xbinary-io.h"
-#ifdef USE_AVX2_WC_LINECOUNT
-# include <cpuid.h>
-#endif
#if !defined iswspace && !HAVE_ISWSPACE
# define iswspace(wc) \
--
2.40.1
From f780a85985f5b069ba8597aaeac49eb74864926a Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Wed, 14 Jun 2023 14:52:37 -0700
Subject: [PATCH 3/3] cksum,wc: clean up hw capability checking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* src/cksum.c (cksum_pclmul) [!CRCTAB && !USE_PCLMUL_CRC32]:
Remove macro.
(cksum_fp): No longer file-scope.
(pclmul_supported): Define only if USE_PCLMUL_CRC32.
This omits the debug output "using generic hardware support"
for simplicity and consistency with wc’s output.
(crc_sum_stream) [!USE_PCLMUL_32]: No need for static function pointer.
* src/wc.c (wc_lines_p) [USE_AVX2_WC_LINECOUNT]: No longer file-scope.
(wc) [USE_AVX2_WC_LINECOUNT]: Check for avx2 support at most once,
which was surely the code’s original intent.
(wc) [!USE_AVX2_WC_LINECOUNT]: No need for static function pointer.
---
src/cksum.c | 29 ++++++++---------------------
src/wc.c | 14 ++++++--------
2 files changed, 14 insertions(+), 29 deletions(-)
diff --git a/src/cksum.c b/src/cksum.c
index 352a0ba3a..26bb29bdb 100644
--- a/src/cksum.c
+++ b/src/cksum.c
@@ -141,23 +141,14 @@ main (void)
# include "error.h"
# include "cksum.h"
-# if !USE_PCLMUL_CRC32
-# define cksum_pclmul cksum_slice8
-# endif /* USE_PCLMUL_CRC32 */
/* Number of bytes to read at once. */
# define BUFLEN (1 << 16)
-
-static bool
-cksum_slice8 (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out);
-static bool
- (*cksum_fp)(FILE *, uint_fast32_t *, uintmax_t *);
-
+# if USE_PCLMUL_CRC32
static bool
pclmul_supported (void)
{
-# if USE_PCLMUL_CRC32
bool pclmul_enabled = (0 < __builtin_cpu_supports ("pclmul")
&& 0 < __builtin_cpu_supports ("avx"));
@@ -168,12 +159,8 @@ pclmul_supported (void)
: _("pclmul support not detected")));
return pclmul_enabled;
-# else
- if (cksum_debug)
- error (0, 0, "%s", _("using generic hardware support"));
- return false;
-# endif /* USE_PCLMUL_CRC32 */
}
+# endif /* USE_PCLMUL_CRC32 */
static bool
cksum_slice8 (FILE *fp, uint_fast32_t *crc_out, uintmax_t *length_out)
@@ -238,13 +225,13 @@ crc_sum_stream (FILE *stream, void *resstream, uintmax_t *length)
uintmax_t total_bytes = 0;
uint_fast32_t crc = 0;
+# if USE_PCLMUL_CRC32
+ static bool (*cksum_fp) (FILE *, uint_fast32_t *, uintmax_t *);
if (! cksum_fp)
- {
- if (pclmul_supported ())
- cksum_fp = cksum_pclmul;
- else
- cksum_fp = cksum_slice8;
- }
+ cksum_fp = pclmul_supported () ? cksum_pclmul : cksum_slice8;
+#else
+ bool (*cksum_fp) (FILE *, uint_fast32_t *, uintmax_t *) = cksum_slice8;
+#endif
if (! cksum_fp (stream, &crc, &total_bytes))
return -1;
diff --git a/src/wc.c b/src/wc.c
index ebe83af4d..30214655c 100644
--- a/src/wc.c
+++ b/src/wc.c
@@ -54,18 +54,12 @@
/* Size of atomic reads. */
#define BUFFER_SIZE (16 * 1024)
-static bool
-wc_lines (char const *file, int fd, uintmax_t *lines_out,
- uintmax_t *bytes_out);
#ifdef USE_AVX2_WC_LINECOUNT
/* From wc_avx2.c */
extern bool
wc_lines_avx2 (char const *file, int fd, uintmax_t *lines_out,
uintmax_t *bytes_out);
#endif
-static bool
-(*wc_lines_p) (char const *file, int fd, uintmax_t *lines_out,
- uintmax_t *bytes_out) = wc_lines;
static bool debug;
@@ -441,8 +435,12 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
else if (!count_chars && !count_complicated)
{
#ifdef USE_AVX2_WC_LINECOUNT
- if (avx2_supported ())
- wc_lines_p = wc_lines_avx2;
+ static bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *);
+ if (!wc_lines_p)
+ wc_lines_p = avx2_supported () ? wc_lines_avx2 : wc_lines;
+#else
+ bool (*wc_lines_p) (char const *, int, uintmax_t *, uintmax_t *)
+ = wc_lines;
#endif
/* Use a separate loop when counting only lines or lines and bytes --
--
2.40.1