Hi. Here's a proposed patch to use CPUID at startup to determine if the SSE4.2 CRC instructions are available, to use them instead of the slice-by-8 implementation (posted earlier).
A few notes: 1. GCC has included cpuid.h since 4.3.0, so I figured it was safe to use. It can be replaced with some inline assembly otherwise. 2. I've also used the crc32b/crc32q instructions directly rather than using ".bytes" to encode the instructions; bintuils versions since 2007 or so have supported them. 3. I've included the MSVC implementation mostly as an example of how to extend this to different compilers/platforms. It's written according to the documentation for MSVC intrinsics, but I have not tested it. Suggestions/improvements are welcome. -- Abhijit
diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 73c30c5..ae34876 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -37,6 +37,7 @@ #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/ps_status.h" +#include "utils/pg_crc.h" const char *progname; @@ -76,6 +77,12 @@ main(int argc, char *argv[]) argv = save_ps_display_args(argc, argv); /* + * Select the fastest available CRC32 implementation for the + * platform. + */ + pg_init_comp_crc32c(); + + /* * If supported on the current platform, set up a handler to be called if * the backend/postmaster crashes with a fatal signal or exception. */ diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h index 55934e5..c59c05b 100644 --- a/src/include/utils/pg_crc.h +++ b/src/include/utils/pg_crc.h @@ -41,7 +41,8 @@ typedef uint32 pg_crc32; -extern pg_crc32 pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len); +extern void pg_init_comp_crc32c(void); +extern pg_crc32 (*pg_comp_crc32c)(pg_crc32 crc, const void *data, size_t len); /* * CRC calculation using the CRC-32C (Castagnoli) polynomial. diff --git a/src/port/pg_crc.c b/src/port/pg_crc.c index 2f9857b..6be17b0 100644 --- a/src/port/pg_crc.c +++ b/src/port/pg_crc.c @@ -21,6 +21,13 @@ #include "utils/pg_crc.h" #include "utils/pg_crc_tables.h" +#if defined(HAVE_CPUID_H) +#include <cpuid.h> +#elif defined(_MSC_VER) +#include <intrin.h> +#include <nmmintrin.h> +#endif + static inline uint32 bswap32(uint32 x) { #if defined(__GNUC__) || defined(__clang__) @@ -39,8 +46,8 @@ static inline uint32 bswap32(uint32 x) #define cpu_to_le32(x) x #endif -pg_crc32 -pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len) +static pg_crc32 +pg_comp_crc32c_sb8(pg_crc32 crc, const void *data, size_t len) { const unsigned char *p = data; const uint32 *p8; @@ -61,7 +68,6 @@ pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len) */ p8 = (const uint32 *) p; - while (len >= 8) { uint32 a = *p8++ ^ cpu_to_le32(crc); @@ -101,8 +107,102 @@ pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len) */ p = (const unsigned char *) p8; - while (len-- > 0) + while (len > 0) + { crc = pg_crc32c_table[0][(crc ^ *p++) & 0xFF] ^ (crc >> 8); + len--; + } + + return crc; +} +static pg_crc32 +pg_asm_crc32b(pg_crc32 crc, unsigned char data) +{ +#ifdef __GNUC__ + __asm__ ("crc32b %[data], %[crc]\n" : [crc] "+r" (crc) : [data] "rm" (data)); return crc; +#elif defined(_MSC_VER) + return _mm_crc32_u8(crc, data); +#else +#error "Don't know how to generate crc32b instruction" +#endif } + +static pg_crc32 +pg_asm_crc32q(uint64 crc, unsigned long long data) +{ +#ifdef __GNUC__ + __asm__ ("crc32q %[data], %[crc]\n" : [crc] "+r" (crc) : [data] "rm" (data)); + return crc; +#elif defined(_MSC_VER) + return _mm_crc32_u64(crc, data); +#else +#error "Don't know how to generate crc32q instruction" +#endif +} + +static pg_crc32 +pg_comp_crc32c_sse(pg_crc32 crc, const void *data, size_t len) +{ + const unsigned char *p = data; + const uint64 *p8; + + /* + * Handle initial bytes one at a time if necessary to ensure that + * the loop below starts with a pointer aligned to four bytes. + */ + + while (len > 0 && ((uintptr_t) p & 3)) + { + crc = pg_asm_crc32b(crc, *p++); + len--; + } + + /* + * Process eight bytes of data at a time. + */ + + p8 = (const uint64 *) p; + while (len >= 8) + { + crc = pg_asm_crc32q(crc, *p8++); + len -= 8; + } + + /* + * Handle any remaining bytes one at a time. + */ + + p = (const unsigned char *) p8; + while (len > 0) + { + crc = pg_asm_crc32b(crc, *p++); + len--; + } + + return crc; +} + +/* + * If (we can tell that) the CPU supports SSE4.2 instructions, we can + * use the CRC instruction, otherwise we fall back to slice-by-8 in + * software. + */ + +void +pg_init_comp_crc32c(void) +{ + unsigned int exx[4] = { 0, 0, 0, 0 }; + +#if defined(__GNUC__) && defined(HAVE_CPUID_H) + __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]); +#elif defined(_MSC_VER) + __cpuid(exx, 1); +#endif + + if (exx[2] & (1 << 20)) + pg_comp_crc32c = pg_comp_crc32c_sse; +} + +pg_crc32 (*pg_comp_crc32c)(pg_crc32 crc, const void *data, size_t len) = pg_comp_crc32c_sb8; diff --git a/configure b/configure index 7594401..284ca6f 100755 --- a/configure +++ b/configure @@ -9195,7 +9195,7 @@ fi done -for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h +for ac_header in atomic.h cpuid.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h do : as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh` ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default" diff --git a/configure.in b/configure.in index 0dc3f18..8ab888d 100644 --- a/configure.in +++ b/configure.in @@ -1023,7 +1023,7 @@ AC_SUBST(UUID_LIBS) ## dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES -AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h]) +AC_CHECK_HEADERS([atomic.h cpuid.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h]) # On BSD, test for net/if.h will fail unless sys/socket.h # is included first. diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in index 465281c..355f5fc 100644 --- a/src/include/pg_config.h.in +++ b/src/include/pg_config.h.in @@ -99,6 +99,9 @@ /* Define to 1 if you have the `class' function. */ #undef HAVE_CLASS +/* Define to 1 if you have the <cpuid.h> header file. */ +#undef HAVE_CPUID_H + /* Define to 1 if you have the <crtdefs.h> header file. */ #undef HAVE_CRTDEFS_H diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32 index 05941e6..c2fe01f 100644 --- a/src/include/pg_config.h.win32 +++ b/src/include/pg_config.h.win32 @@ -78,6 +78,9 @@ /* Define to 1 if you have the `class' function. */ /* #undef HAVE_CLASS */ +/* Define to 1 if you have the <cpuid.h> header file. */ +/* #undef HAVE_CPUID_H */ + /* Define to 1 if you have the `crypt' function. */ /* #undef HAVE_CRYPT */
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers