Hi.

Here's a proposed patch to use CPUID at startup to determine if the
SSE4.2 CRC instructions are available, to use them instead of the
slice-by-8 implementation (posted earlier).

A few notes:

1. GCC has included cpuid.h since 4.3.0, so I figured it was safe to
   use. It can be replaced with some inline assembly otherwise.

2. I've also used the crc32b/crc32q instructions directly rather than
   using ".bytes" to encode the instructions; bintuils versions since
   2007 or so have supported them.

3. I've included the MSVC implementation mostly as an example of how to
   extend this to different compilers/platforms. It's written according
   to the documentation for MSVC intrinsics, but I have not tested it.
   Suggestions/improvements are welcome.

-- Abhijit
diff --git a/src/backend/main/main.c b/src/backend/main/main.c
index 73c30c5..ae34876 100644
--- a/src/backend/main/main.c
+++ b/src/backend/main/main.c
@@ -37,6 +37,7 @@
 #include "utils/memutils.h"
 #include "utils/pg_locale.h"
 #include "utils/ps_status.h"
+#include "utils/pg_crc.h"
 
 
 const char *progname;
@@ -76,6 +77,12 @@ main(int argc, char *argv[])
 	argv = save_ps_display_args(argc, argv);
 
 	/*
+	 * Select the fastest available CRC32 implementation for the
+	 * platform.
+	 */
+	pg_init_comp_crc32c();
+
+	/*
 	 * If supported on the current platform, set up a handler to be called if
 	 * the backend/postmaster crashes with a fatal signal or exception.
 	 */

diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h
index 55934e5..c59c05b 100644
--- a/src/include/utils/pg_crc.h
+++ b/src/include/utils/pg_crc.h
@@ -41,7 +41,8 @@
 
 typedef uint32 pg_crc32;
 
-extern pg_crc32 pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len);
+extern void pg_init_comp_crc32c(void);
+extern pg_crc32 (*pg_comp_crc32c)(pg_crc32 crc, const void *data, size_t len);
 
 /*
  * CRC calculation using the CRC-32C (Castagnoli) polynomial.

diff --git a/src/port/pg_crc.c b/src/port/pg_crc.c
index 2f9857b..6be17b0 100644
--- a/src/port/pg_crc.c
+++ b/src/port/pg_crc.c
@@ -21,6 +21,13 @@
 #include "utils/pg_crc.h"
 #include "utils/pg_crc_tables.h"
 
+#if defined(HAVE_CPUID_H)
+#include <cpuid.h>
+#elif defined(_MSC_VER)
+#include <intrin.h>
+#include <nmmintrin.h>
+#endif
+
 static inline uint32 bswap32(uint32 x)
 {
 #if defined(__GNUC__) || defined(__clang__)
@@ -39,8 +46,8 @@ static inline uint32 bswap32(uint32 x)
 #define cpu_to_le32(x) x
 #endif
 
-pg_crc32
-pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len)
+static pg_crc32
+pg_comp_crc32c_sb8(pg_crc32 crc, const void *data, size_t len)
 {
 	const unsigned char *p = data;
 	const uint32 *p8;
@@ -61,7 +68,6 @@ pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len)
 	 */
 
 	p8 = (const uint32 *) p;
-
 	while (len >= 8)
 	{
 		uint32 a = *p8++ ^ cpu_to_le32(crc);
@@ -101,8 +107,102 @@ pg_comp_crc32c(pg_crc32 crc, const void *data, size_t len)
 	 */
 
 	p = (const unsigned char *) p8;
-	while (len-- > 0)
+	while (len > 0)
+	{
 		crc = pg_crc32c_table[0][(crc ^ *p++) & 0xFF] ^ (crc >> 8);
+		len--;
+	}
+
+	return crc;
+}
 
+static pg_crc32
+pg_asm_crc32b(pg_crc32 crc, unsigned char data)
+{
+#ifdef __GNUC__
+	__asm__ ("crc32b %[data], %[crc]\n" : [crc] "+r" (crc) : [data] "rm" (data));
 	return crc;
+#elif defined(_MSC_VER)
+	return _mm_crc32_u8(crc, data);
+#else
+#error "Don't know how to generate crc32b instruction"
+#endif
 }
+
+static pg_crc32
+pg_asm_crc32q(uint64 crc, unsigned long long data)
+{
+#ifdef __GNUC__
+	__asm__ ("crc32q %[data], %[crc]\n" : [crc] "+r" (crc) : [data] "rm" (data));
+	return crc;
+#elif defined(_MSC_VER)
+	return _mm_crc32_u64(crc, data);
+#else
+#error "Don't know how to generate crc32q instruction"
+#endif
+}
+
+static pg_crc32
+pg_comp_crc32c_sse(pg_crc32 crc, const void *data, size_t len)
+{
+	const unsigned char *p = data;
+	const uint64 *p8;
+
+	/*
+	 * Handle initial bytes one at a time if necessary to ensure that
+	 * the loop below starts with a pointer aligned to four bytes.
+	 */
+
+	while (len > 0 && ((uintptr_t) p & 3))
+	{
+		crc = pg_asm_crc32b(crc, *p++);
+		len--;
+	}
+
+	/*
+	 * Process eight bytes of data at a time.
+	 */
+
+	p8 = (const uint64 *) p;
+	while (len >= 8)
+	{
+		crc = pg_asm_crc32q(crc, *p8++);
+		len -= 8;
+	}
+
+	/*
+	 * Handle any remaining bytes one at a time.
+	 */
+
+	p = (const unsigned char *) p8;
+	while (len > 0)
+	{
+		crc = pg_asm_crc32b(crc, *p++);
+		len--;
+	}
+
+	return crc;
+}
+
+/*
+ * If (we can tell that) the CPU supports SSE4.2 instructions, we can
+ * use the CRC instruction, otherwise we fall back to slice-by-8 in
+ * software.
+ */
+
+void
+pg_init_comp_crc32c(void)
+{
+	unsigned int exx[4] = { 0, 0, 0, 0 };
+
+#if defined(__GNUC__) && defined(HAVE_CPUID_H)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(_MSC_VER)
+	__cpuid(exx, 1);
+#endif
+
+	if (exx[2] & (1 << 20))
+		pg_comp_crc32c = pg_comp_crc32c_sse;
+}
+
+pg_crc32 (*pg_comp_crc32c)(pg_crc32 crc, const void *data, size_t len) = pg_comp_crc32c_sb8;

diff --git a/configure b/configure
index 7594401..284ca6f 100755
--- a/configure
+++ b/configure
@@ -9195,7 +9195,7 @@ fi
 done
 
 
-for ac_header in atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
+for ac_header in atomic.h cpuid.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h
 do :
   as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
 ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"

diff --git a/configure.in b/configure.in
index 0dc3f18..8ab888d 100644
--- a/configure.in
+++ b/configure.in
@@ -1023,7 +1023,7 @@ AC_SUBST(UUID_LIBS)
 ##
 
 dnl sys/socket.h is required by AC_FUNC_ACCEPT_ARGTYPES
-AC_CHECK_HEADERS([atomic.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
+AC_CHECK_HEADERS([atomic.h cpuid.h crypt.h dld.h fp_class.h getopt.h ieeefp.h ifaddrs.h langinfo.h mbarrier.h poll.h pwd.h sys/ioctl.h sys/ipc.h sys/poll.h sys/pstat.h sys/resource.h sys/select.h sys/sem.h sys/shm.h sys/socket.h sys/sockio.h sys/tas.h sys/time.h sys/un.h termios.h ucred.h utime.h wchar.h wctype.h])
 
 # On BSD, test for net/if.h will fail unless sys/socket.h
 # is included first.

diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 465281c..355f5fc 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -99,6 +99,9 @@
 /* Define to 1 if you have the `class' function. */
 #undef HAVE_CLASS
 
+/* Define to 1 if you have the <cpuid.h> header file. */
+#undef HAVE_CPUID_H
+
 /* Define to 1 if you have the <crtdefs.h> header file. */
 #undef HAVE_CRTDEFS_H
 
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index 05941e6..c2fe01f 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -78,6 +78,9 @@
 /* Define to 1 if you have the `class' function. */
 /* #undef HAVE_CLASS */
 
+/* Define to 1 if you have the <cpuid.h> header file. */
+/* #undef HAVE_CPUID_H */
+
 /* Define to 1 if you have the `crypt' function. */
 /* #undef HAVE_CRYPT */
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to