Rather than testing single bits in a loop, change AllocSetFreeIndex to
use the __builtin_clz() function to calculate the chunk index.

This requires a new check for __builtin_clz in the configure script.

Results in a ~2% performance increase on sysbench on PowerPC.

Signed-off-by: Jeremy Kerr <j...@ozlabs.org>

---
v4: don't use separate fls() function, remove hardcoded 32

---
 configure.in                  |   13 +++++++++++++
 src/backend/utils/mmgr/aset.c |   14 +++++++++++++-
 2 files changed, 26 insertions(+), 1 deletion(-)

*** a/configure.in
--- b/configure.in
***************
*** 1361,1366 **** case $host_os in
--- 1361,1379 ----
                AC_FUNC_FSEEKO;;
  esac
  
+ # GCC builtins
+ #
+ # We need AC_TRY_LINK here, as the prototype generated by AC_CHECK_FUNC
+ # will cause gcc to try to reference a non-builtin symbol.
+ 
+ AC_MSG_CHECKING([for __builtin_clz])
+ AC_TRY_LINK([],
+           [__builtin_clz(0);],
+           [AC_DEFINE(HAVE_BUILTIN_CLZ, 1,
+                               [Define to 1 if you have __builtin_clz().])
+                       AC_MSG_RESULT(yes)],
+               [AC_MSG_RESULT(no)])
+ 
  
  #
  # Pthreads
*** a/src/backend/utils/mmgr/aset.c
--- b/src/backend/utils/mmgr/aset.c
***************
*** 268,281 **** AllocSetFreeIndex(Size size)
  {
        int                     idx = 0;
  
!       if (size > 0)
        {
                size = (size - 1) >> ALLOC_MINBITS;
                while (size != 0)
                {
                        idx++;
                        size >>= 1;
                }
                Assert(idx < ALLOCSET_NUM_FREELISTS);
        }
  
--- 268,293 ----
  {
        int                     idx = 0;
  
!       if (size > (1 << ALLOC_MINBITS))
        {
                size = (size - 1) >> ALLOC_MINBITS;
+ 
+ #ifdef HAVE_BUILTIN_CLZ
+               /* If possible, use __builtin_clz to calculate the chunk index 
- this
+                * should be O(1) rather than O(n). The builtin takes an 
unsigned int,
+                * so we need to cast from the possibly 64-bit Size type. This 
cast
+                * won't overflow, as the limit is at 2^(32 + ALLOC_MINBITS) 
bytes,
+                * which is larger than ALLOC_CHUNK_LIMIT.
+                */
+               idx = sizeof(unsigned int) * BITS_PER_BYTE -
+                               __builtin_clz((unsigned int)size);
+ #else
                while (size != 0)
                {
                        idx++;
                        size >>= 1;
                }
+ #endif
                Assert(idx < ALLOCSET_NUM_FREELISTS);
        }
  

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to