On 2/12/24 16:49, Patrick Palka wrote:
Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for trunk?
OK.
-- >8 --
The current implementation of bytes::calc_crc computes the checksum one
byte at a time, and turns out to be quite slow, taking 5% and 15% of
time compiling and streaming in the std module respectively. We have
a crc32_unsigned routine that handles 4 bytes at a time which could
speed up this hot function, but we also have a bundled zlib with highly
optimized crc routines that can handle up to 32 bytes at a time.
So this patch makes us use zlib's crc32 in this hot code path.
According to some perf experiments this reduces the overhead of calc_crc
from 15% of total time to 3% when streaming in the std module.
gcc/cp/ChangeLog:
* Make-lang.in (CFLAGS-cp/module.o): Add $(ZLIBINC).
* module.cc: Include <zlib.h>.
(bytes::calc_crc): Use crc32 from zlib.
(bytes_out::set_crc): Use crc32_combine from zlib.
---
gcc/cp/Make-lang.in | 2 +-
gcc/cp/module.cc | 8 +++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/gcc/cp/Make-lang.in b/gcc/cp/Make-lang.in
index 630db41d87e..f153891a1ef 100644
--- a/gcc/cp/Make-lang.in
+++ b/gcc/cp/Make-lang.in
@@ -55,7 +55,7 @@ c++.serial = cc1plus$(exeext)
CFLAGS-cp/g++spec.o += $(DRIVER_DEFINES)
CFLAGS-cp/module.o += -DHOST_MACHINE=\"$(host)\" \
- -DTARGET_MACHINE=\"$(target)\"
+ -DTARGET_MACHINE=\"$(target)\" $(ZLIBINC)
# In non-release builds, use a date-related module version.
ifneq ($(DEVPHASE_c),)
diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 86e43aee542..c94f4a257de 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -233,6 +233,7 @@ Classes used:
/* This TU doesn't need or want to see the networking. */
#define CODY_NETWORKING 0
#include "mapper-client.h"
+#include <zlib.h> // for crc32, crc32_combine
#if 0 // 1 for testing no mmap
#define MAPPED_READING 0
@@ -487,10 +488,7 @@ protected:
unsigned
bytes::calc_crc (unsigned l) const
{
- unsigned crc = 0;
- for (size_t ix = 4; ix < l; ix++)
- crc = crc32_byte (crc, buffer[ix]);
- return crc;
+ return crc32 (0, (unsigned char *)buffer + 4, l - 4);
}
class elf_in;
@@ -717,7 +715,7 @@ bytes_out::set_crc (unsigned *crc_ptr)
unsigned crc = calc_crc (pos);
unsigned accum = *crc_ptr;
/* Only mix the existing *CRC_PTR if it is non-zero. */
- accum = accum ? crc32_unsigned (accum, crc) : crc;
+ accum = accum ? crc32_combine (accum, crc, pos - 4) : crc;
*crc_ptr = accum;
/* Buffer will be sufficiently aligned. */