Hi,

I started to analyze XLogInsert because it was the major bottleneck when 
creating some materialized view/cached tables/whatever.
Analyzing it I could see that content of the COMP_CRC32 macro was taking most 
of the time which isn't immediately obvious when you profile because it 
obviously doesn't show up as a separate function.
I first put it into functions to make it easier to profile. I couldn't measure 
any difference for COPY, CTAS and a simple pgbench run on 3 kinds of hardware 
(Core2, older Xeon, older Sparc systems).

I looked a bit around for faster implementations of CRC32 and found one in 
zlib. After adapting it (pg uses slightly different computation (non-
inverted)) I found that it increases the speed of the CRC32 calculation itself 
3 fold.
It does that by not only using one lookup table but four (one for each byte of 
a word). Those four calculations are independent and thus are considerably 
faster on somewhat recent hardware.
Also it does memory lookups in 4 byte steps instead of 1 byte as the pg 
version (thats only about ~8% benefit in itself).

I wrote a preliminary patch which includes both, the original implementation 
and the new one switchable via an #define.


I tested performance differences in a small number of scenarios:
- CTAS/INSERT ... SELECT (8-30%)
- COPY (3-20%)
- pgbench (no real difference unless directly after a checkpoint)

Setup:

CREATE TABLE blub (ai int, bi int, aibi int);
CREATE TABLE speedtest (ai int, bi int, aibi int);


INSERT ... SELECT:

Statement:
INSERT INTO blub SELECT a.i, b.i, a.i *b.i FROM generate_series(1, 10000) 
a(i), generate_series(1, 1000) b(i);

legacy crc:

11526.588
11406.518
11412.182
11430.245

zlib:
9977.394
9945.408
9840.907
9842.875


COPY:
Statement:
('blub' enlarged here 4 times, as otherwise the variances were to large)

COPY blub TO '/tmp/b' BINARY;
...
CHECKPOINT;TRUNCATE speedtest; COPY speedtest FROM '/tmp/b' BINARY;

legacy:
44835.840
44832.876

zlib:
39530.549
39365.109
39295.167

The performance differences are bigger if the table rows are significantly 
bigger. 

Do you think something like that is sensible? If yes, I will make it into a 
proper patch and such.

Thanks,

Andres

INSERT ... SELECT profile before patch:

    20.22%         postgres  postgres   [.] comp_crc32
     5.77%         postgres  postgres   [.] XLogInsert
     5.55%         postgres  postgres   [.] LWLockAcquire
     5.21%         postgres  [kernel.   [k] copy_user_generic_string
     4.64%         postgres  postgres   [.] LWLockRelease
     4.39%         postgres  postgres   [.] ReadBuffer_common
     2.75%         postgres  postgres   [.] heap_insert
     2.22%         postgres  libc-2.1   [.] memcpy
     2.09%         postgres  postgres   [.] UnlockReleaseBuffer
     1.85%         postgres  postgres   [.] hash_any
     1.77%         postgres  [kernel.   [k] clear_page_c
     1.69%         postgres  postgres   [.] hash_search_with_hash_value
     1.61%         postgres  postgres   [.] heapgettup_pagemode
     1.50%         postgres  postgres   [.] PageAddItem
     1.42%         postgres  postgres   [.] MarkBufferDirty
     1.28%         postgres  postgres   [.] RelationGetBufferForTuple
     1.15%         postgres  postgres   [.] ExecModifyTable
     1.06%         postgres  postgres   [.] RelationPutHeapTuple


After:

     9.97%         postgres  postgres   [.] comp_crc32
     5.95%         postgres  [kernel.   [k] copy_user_generic_string
     5.94%         postgres  postgres   [.] LWLockAcquire
     5.64%         postgres  postgres   [.] XLogInsert
     5.11%         postgres  postgres   [.] LWLockRelease
     4.63%         postgres  postgres   [.] ReadBuffer_common
     3.45%         postgres  postgres   [.] heap_insert
     2.54%         postgres  libc-2.1   [.] memcpy
     2.03%         postgres  postgres   [.] UnlockReleaseBuffer
     1.94%         postgres  postgres   [.] hash_search_with_hash_value
     1.84%         postgres  postgres   [.] hash_any
     1.73%         postgres  [kernel.   [k] clear_page_c
     1.68%         postgres  postgres   [.] PageAddItem
     1.62%         postgres  postgres   [.] heapgettup_pagemode
     1.52%         postgres  postgres   [.] RelationGetBufferForTuple
     1.47%         postgres  postgres   [.] MarkBufferDirty
     1.30%         postgres  postgres   [.] ExecModifyTable
     1.23%         postgres  postgres   [.] RelationPutHeapTuple
From f8ec18769e581cf039535730d2088466c461d8f6 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Thu, 29 Apr 2010 22:19:08 +0200
Subject: [PATCH] Preliminary patch using an improved out of line crc32 computation for
 the xlog.

---
 src/backend/access/transam/xlog.c |   66 +++++++++---------
 src/backend/utils/hash/pg_crc.c   |  142 ++++++++++++++++++++++++++++++++++++-
 src/include/utils/pg_crc.h        |    9 ++-
 3 files changed, 180 insertions(+), 37 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 992a337..ffb9fc4 100644
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
*************** begin:;
*** 700,706 ****
  		{
  			/* Simple data, just include it */
  			len += rdt->len;
! 			COMP_CRC32(rdata_crc, rdt->data, rdt->len);
  		}
  		else
  		{
--- 700,706 ----
  		{
  			/* Simple data, just include it */
  			len += rdt->len;
! 			rdata_crc = comp_crc32(rdata_crc, rdt->data, rdt->len);
  		}
  		else
  		{
*************** begin:;
*** 715,721 ****
  					else if (rdt->data)
  					{
  						len += rdt->len;
! 						COMP_CRC32(rdata_crc, rdt->data, rdt->len);
  					}
  					break;
  				}
--- 715,721 ----
  					else if (rdt->data)
  					{
  						len += rdt->len;
! 						rdata_crc = comp_crc32(rdata_crc, rdt->data, rdt->len);
  					}
  					break;
  				}
*************** begin:;
*** 732,738 ****
  					else if (rdt->data)
  					{
  						len += rdt->len;
! 						COMP_CRC32(rdata_crc, rdt->data, rdt->len);
  					}
  					break;
  				}
--- 732,738 ----
  					else if (rdt->data)
  					{
  						len += rdt->len;
! 						rdata_crc = comp_crc32(rdata_crc, rdt->data, rdt->len);
  					}
  					break;
  				}
*************** begin:;
*** 757,781 ****
  			BkpBlock   *bkpb = &(dtbuf_xlg[i]);
  			char	   *page;
  
! 			COMP_CRC32(rdata_crc,
! 					   (char *) bkpb,
! 					   sizeof(BkpBlock));
  			page = (char *) BufferGetBlock(dtbuf[i]);
  			if (bkpb->hole_length == 0)
  			{
! 				COMP_CRC32(rdata_crc,
! 						   page,
! 						   BLCKSZ);
  			}
  			else
  			{
  				/* must skip the hole */
! 				COMP_CRC32(rdata_crc,
! 						   page,
! 						   bkpb->hole_offset);
! 				COMP_CRC32(rdata_crc,
! 						   page + (bkpb->hole_offset + bkpb->hole_length),
! 						   BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
  			}
  		}
  	}
--- 757,781 ----
  			BkpBlock   *bkpb = &(dtbuf_xlg[i]);
  			char	   *page;
  
! 			rdata_crc = comp_crc32(rdata_crc,
! 			                       (char *) bkpb,
! 			                       sizeof(BkpBlock));
  			page = (char *) BufferGetBlock(dtbuf[i]);
  			if (bkpb->hole_length == 0)
  			{
! 				rdata_crc = comp_crc32(rdata_crc,
! 				                       page,
! 				                       BLCKSZ);
  			}
  			else
  			{
  				/* must skip the hole */
! 				rdata_crc = comp_crc32(rdata_crc,
! 				                       page,
! 				                       bkpb->hole_offset);
! 				rdata_crc = comp_crc32(rdata_crc,
! 				                       page + (bkpb->hole_offset + bkpb->hole_length),
! 				                       BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
  			}
  		}
  	}
*************** begin:;
*** 980,987 ****
  	record->xl_rmid = rmid;
  
  	/* Now we can finish computing the record's CRC */
! 	COMP_CRC32(rdata_crc, (char *) record + sizeof(pg_crc32),
! 			   SizeOfXLogRecord - sizeof(pg_crc32));
  	FIN_CRC32(rdata_crc);
  	record->xl_crc = rdata_crc;
  
--- 980,987 ----
  	record->xl_rmid = rmid;
  
  	/* Now we can finish computing the record's CRC */
! 	rdata_crc = comp_crc32(rdata_crc, (char *) record + sizeof(pg_crc32),
! 	                       SizeOfXLogRecord - sizeof(pg_crc32));
  	FIN_CRC32(rdata_crc);
  	record->xl_crc = rdata_crc;
  
*************** RecordIsValid(XLogRecord *record, XLogRe
*** 3550,3556 ****
  
  	/* First the rmgr data */
  	INIT_CRC32(crc);
! 	COMP_CRC32(crc, XLogRecGetData(record), len);
  
  	/* Add in the backup blocks, if any */
  	blk = (char *) XLogRecGetData(record) + len;
--- 3550,3556 ----
  
  	/* First the rmgr data */
  	INIT_CRC32(crc);
! 	crc = comp_crc32(crc, XLogRecGetData(record), len);
  
  	/* Add in the backup blocks, if any */
  	blk = (char *) XLogRecGetData(record) + len;
*************** RecordIsValid(XLogRecord *record, XLogRe
*** 3570,3576 ****
  			return false;
  		}
  		blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
! 		COMP_CRC32(crc, blk, blen);
  		blk += blen;
  	}
  
--- 3570,3576 ----
  			return false;
  		}
  		blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
! 		crc = comp_crc32(crc, blk, blen);
  		blk += blen;
  	}
  
*************** RecordIsValid(XLogRecord *record, XLogRe
*** 3584,3591 ****
  	}
  
  	/* Finally include the record header */
! 	COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
! 			   SizeOfXLogRecord - sizeof(pg_crc32));
  	FIN_CRC32(crc);
  
  	if (!EQ_CRC32(record->xl_crc, crc))
--- 3584,3591 ----
  	}
  
  	/* Finally include the record header */
! 	crc = comp_crc32(crc, (char *) record + sizeof(pg_crc32),
! 	                 SizeOfXLogRecord - sizeof(pg_crc32));
  	FIN_CRC32(crc);
  
  	if (!EQ_CRC32(record->xl_crc, crc))
*************** WriteControlFile(void)
*** 4450,4458 ****
  
  	/* Contents are protected with a CRC */
  	INIT_CRC32(ControlFile->crc);
! 	COMP_CRC32(ControlFile->crc,
! 			   (char *) ControlFile,
! 			   offsetof(ControlFileData, crc));
  	FIN_CRC32(ControlFile->crc);
  
  	/*
--- 4450,4458 ----
  
  	/* Contents are protected with a CRC */
  	INIT_CRC32(ControlFile->crc);
! 	ControlFile->crc = comp_crc32(ControlFile->crc,
! 	                              (char *) ControlFile,
! 	                              offsetof(ControlFileData, crc));
  	FIN_CRC32(ControlFile->crc);
  
  	/*
*************** ReadControlFile(void)
*** 4550,4558 ****
  
  	/* Now check the CRC. */
  	INIT_CRC32(crc);
! 	COMP_CRC32(crc,
! 			   (char *) ControlFile,
! 			   offsetof(ControlFileData, crc));
  	FIN_CRC32(crc);
  
  	if (!EQ_CRC32(crc, ControlFile->crc))
--- 4550,4558 ----
  
  	/* Now check the CRC. */
  	INIT_CRC32(crc);
! 	crc = comp_crc32(crc,
! 	                 (char *) ControlFile,
! 	                 offsetof(ControlFileData, crc));
  	FIN_CRC32(crc);
  
  	if (!EQ_CRC32(crc, ControlFile->crc))
*************** UpdateControlFile(void)
*** 4688,4696 ****
  	int			fd;
  
  	INIT_CRC32(ControlFile->crc);
! 	COMP_CRC32(ControlFile->crc,
! 			   (char *) ControlFile,
! 			   offsetof(ControlFileData, crc));
  	FIN_CRC32(ControlFile->crc);
  
  	fd = BasicOpenFile(XLOG_CONTROL_FILE,
--- 4688,4696 ----
  	int			fd;
  
  	INIT_CRC32(ControlFile->crc);
! 	ControlFile->crc = comp_crc32(ControlFile->crc,
! 	                              (char *) ControlFile,
! 	                              offsetof(ControlFileData, crc));
  	FIN_CRC32(ControlFile->crc);
  
  	fd = BasicOpenFile(XLOG_CONTROL_FILE,
*************** BootStrapXLOG(void)
*** 4900,4908 ****
  	memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
  
  	INIT_CRC32(crc);
! 	COMP_CRC32(crc, &checkPoint, sizeof(checkPoint));
! 	COMP_CRC32(crc, (char *) record + sizeof(pg_crc32),
! 			   SizeOfXLogRecord - sizeof(pg_crc32));
  	FIN_CRC32(crc);
  	record->xl_crc = crc;
  
--- 4900,4908 ----
  	memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
  
  	INIT_CRC32(crc);
! 	crc = comp_crc32(crc, &checkPoint, sizeof(checkPoint));
! 	crc = comp_crc32(crc, (char *) record + sizeof(pg_crc32),
! 	                 SizeOfXLogRecord - sizeof(pg_crc32));
  	FIN_CRC32(crc);
  	record->xl_crc = crc;
  
diff --git a/src/backend/utils/hash/pg_crc.c b/src/backend/utils/hash/pg_crc.c
index 2ef62e5..32ba6f6 100644
*** a/src/backend/utils/hash/pg_crc.c
--- b/src/backend/utils/hash/pg_crc.c
***************
*** 26,32 ****
  
  /* Use c.h so that this file can be built in either frontend or backend */
  #include "c.h"
! 
  
  /*
   * This table is based on the polynomial
--- 26,32 ----
  
  /* Use c.h so that this file can be built in either frontend or backend */
  #include "c.h"
! #include "utils/pg_crc.h"
  
  /*
   * This table is based on the polynomial
*************** const uint32 pg_crc32_table[256] = {
*** 100,105 ****
--- 100,245 ----
  	0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
  };
  
+ /*
+  * 1: pg version
+  * 2: adapted zlib version
+  */
+ #define CRC_VERSION 2
+ #if CRC_VERSION == 1
+ 
+ /* Accumulate some (more) bytes into a CRC */
+ pg_crc32 comp_crc32(pg_crc32 crc, const void *data, uint32 len){
+ 	pg_crc32 start = crc;
+ 	uint32 start_len = len;
+ 	do {
+ 		unsigned char *__data = (unsigned char *) (data);
+ 		while (len-- > 0){
+ 			int __tab_index = ((int) ((crc) >> 24) ^ *__data++) & 0xFF;
+ 			crc = pg_crc32_table[__tab_index] ^ ((crc) << 8);
+ 		}
+ 	} while (0);
+ 
+ 	return crc;
+ }
+ 
+ 
+ #elif CRC_VERSION == 2
+ 
+ static inline uint32 swab32(const uint32 x);
+ static inline uint32 swab32(const uint32 x){
+ 	return ((x & (uint32)0x000000ffUL) << 24) |
+ 		((x & (uint32)0x0000ff00UL) <<  8) |
+ 		((x & (uint32)0x00ff0000UL) >>  8) |
+ 		((x & (uint32)0xff000000UL) >> 24);
+ }
+ 
+ #if defined __BIG_ENDIAN__
+ #define cpu_to_be32(x)
+ #else
+ #define cpu_to_be32(x) swab32(x)
+ #endif
+ 
+ #define unlikely(x)	__builtin_expect(!!(x), 0)
+ #define likely(x) __builtin_expect(!!(x), 1)
+ 
+ static pg_crc32 crc_table[4][256];
+ 
+ static void prepare(){
+ 	/* generate crc for each value followed by one, two, and three zeros*/
+ 	int n;
+ 	int k;
+ 	pg_crc32 c;
+ 
+ 	for (n = 0; n < 256; n++) {
+ 		c = pg_crc32_table[n];
+ 		crc_table[0][n] = c;
+ 	}
+ 
+ 	for (n = 0; n < 256; n++) {
+ 		c = crc_table[0][n];
+ 		for (k = 1; k < 4; k++) {
+ 			//orig:
+ 			//c = crc_table[0][c & 0xff] ^ (c >> 8);
+ 			//pg compliant
+ 			c = crc_table[0][(c >> 24)] ^ (c << 8);
+ 			crc_table[k][n] = c;
+ 		}
+ 	}
+ }
+ 
+ 
+ /* ========================================================================= */
+ #define DOCRC4 d = *buf4++; c ^= cpu_to_be32(d);	  \
+         c = crc_table[0][c & 0xff]         ^ crc_table[1][(c >> 8) & 0xff] ^ \
+             crc_table[2][(c >> 16) & 0xff] ^ crc_table[3][c >> 24]
+ #define DOCRC32 DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4; DOCRC4
+ 
+ /* ========================================================================= */
+ pg_crc32 comp_crc32(pg_crc32 crc, const void *data2, uint32 len)
+ {
+ 	/*
+ 	 * XXX: That table should likely be precomputed at compile time,
+ 	 * but for now I didnt bother.
+ 	 */
+ 	static int initialized = 0;
+ 	if(!initialized){
+ 		prepare();
+ 		initialized = 1;
+ 	}
+ 
+ 	unsigned char *data = (unsigned char *) (data2);
+ 	uint32 c = crc;
+     const uint32 *buf4;
+     uint32 d;
+ 
+     /*
+      * If the input is not on a 4byte boundary, align it. Its dubious
+      * if we actually need that, but the costs of that check should be
+      * marginal.
+      */
+     while (unlikely(len && ((ptrdiff_t)data & 3))) {
+ 	    //printf("align\n");
+ 	    c = crc_table[0][((c >> 24) ^ *data++) & 0xff] ^ (c << 8);
+ 	    len--;
+     }
+ 
+ 
+     buf4 = (const uint32 *)(const void *)data;
+     /*
+      * manually unrolling the loop seems to be faster on most
+      * hardware/compiler combination I tried.
+      */
+     while (likely(len >= 32)) {
+ 	    DOCRC32;
+ 	    len -= 32;
+ 	    //printf("crc: %x after bigstep\n", c);
+     }
+ 
+     /*
+      * There very validly might be input which is not a multiple of 32byte.
+      */
+     while (len >= 4) {
+ 	    DOCRC4;
+ 	    len -= 4;
+ 	    //printf("crc: %x after step2\n", c);
+     }
+ 
+     data = (unsigned char *)buf4;
+     /*
+      * Computed unaligned end of data
+      */
+     if (unlikely(len)){
+ 	    //printf("unalign\n");
+ 	    do {
+ 		    c = crc_table[0][((c >> 24) ^ *data++) & 0xff] ^ (c << 8);
+ 	    }
+ 	    while (--len);
+     }
+     return c;
+ }
+ #undef DOCRC4
+ #undef DOCRC32
+ #endif
  
  #ifdef PROVIDE_64BIT_CRC
  
diff --git a/src/include/utils/pg_crc.h b/src/include/utils/pg_crc.h
index 3e34d62..32de738 100644
*** a/src/include/utils/pg_crc.h
--- b/src/include/utils/pg_crc.h
***************
*** 31,36 ****
--- 31,42 ----
  
  typedef uint32 pg_crc32;
  
+ extern pg_crc32 comp_crc32(pg_crc32 crc, const void *data, uint32 len);
+ 
+ 
+ /* Constant table for CRC calculation */
+ extern CRCDLLIMPORT const uint32 pg_crc32_table[];
+ 
  /* Initialize a CRC accumulator */
  #define INIT_CRC32(crc) ((crc) = 0xFFFFFFFF)
  
*************** do { \
*** 53,61 ****
  /* Check for equality of two CRCs */
  #define EQ_CRC32(c1,c2)  ((c1) == (c2))
  
- /* Constant table for CRC calculation */
- extern CRCDLLIMPORT const uint32 pg_crc32_table[];
- 
  
  #ifdef PROVIDE_64BIT_CRC
  
--- 59,64 ----
-- 
1.6.5.12.gd65df24

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to