---
 configure.ac                                       |   1 +
 scons/gallium.py                                   |   2 +
 src/gallium/auxiliary/tgsi/tgsi_exec.c             |   1 +
 src/gallium/auxiliary/tgsi/tgsi_scan.c             |   2 +-
 src/gallium/auxiliary/util/u_helpers.c             |   1 +
 src/gallium/auxiliary/util/u_math.h                | 118 -------------
 src/gallium/auxiliary/util/u_vbuf.c                |   1 +
 src/gallium/drivers/i915/i915_state_emit.c         |   1 +
 src/gallium/drivers/ilo/ilo_shader.c               |   1 +
 src/gallium/drivers/ilo/ilo_state.c                |   1 +
 src/gallium/drivers/llvmpipe/lp_rast_tri.c         |   1 +
 src/gallium/drivers/llvmpipe/lp_setup_tri.c        |   1 +
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   |   5 +-
 .../drivers/nouveau/codegen/nv50_ir_util.cpp       |   1 +
 src/gallium/drivers/nouveau/nv50/nv50_screen.c     |   1 +
 .../drivers/nouveau/nv50/nv50_shader_state.c       |   1 +
 src/gallium/drivers/r600/evergreen_compute.c       |   1 +
 src/gallium/drivers/r600/r600_blit.c               |   1 +
 src/gallium/drivers/r600/r600_state_common.c       |   1 +
 src/gallium/drivers/radeon/r600_streamout.c        |   1 +
 src/gallium/drivers/radeonsi/si_descriptors.c      |   1 +
 src/gallium/drivers/radeonsi/si_state_draw.c       |   3 +-
 src/gallium/drivers/softpipe/sp_quad_fs.c          |   1 +
 src/gallium/state_trackers/clover/api/memory.cpp   |   1 +
 src/gallium/state_trackers/glx/xlib/glx_api.c      |   6 +-
 src/gallium/state_trackers/glx/xlib/xm_api.c       |  10 +-
 src/mesa/drivers/common/meta.c                     |   3 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp       |   4 +-
 src/mesa/drivers/dri/i965/brw_curbe.c              |   2 +-
 src/mesa/drivers/dri/i965/brw_draw.c               |   6 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp               |  12 +-
 src/mesa/drivers/dri/i965/brw_shader.cpp           |   2 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp             |   2 +-
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp  |   2 +-
 src/mesa/drivers/dri/i965/brw_wm.c                 |   4 +-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c   |   2 +-
 src/mesa/drivers/x11/fakeglx.c                     |   6 +-
 src/mesa/drivers/x11/xm_api.c                      |  16 +-
 src/mesa/main/bitset.h                             |   1 +
 src/mesa/main/buffers.c                            |   6 +-
 src/mesa/main/imports.c                            |  88 ---------
 src/mesa/main/imports.h                            |  54 +-----
 src/mesa/program/program_parse.y                   |   2 +-
 src/util/bitcount.h                                | 196 +++++++++++++++++++++
 44 files changed, 266 insertions(+), 307 deletions(-)
 create mode 100644 src/util/bitcount.h

diff --git a/configure.ac b/configure.ac
index 03f1bca..be673da 100644
--- a/configure.ac
+++ b/configure.ac
@@ -130,6 +130,7 @@ fi
 dnl Check for compiler builtins
 AX_GCC_BUILTIN([__builtin_bswap32])
 AX_GCC_BUILTIN([__builtin_bswap64])
+AX_GCC_BUILTIN([__builtin_clrsb])
 AX_GCC_BUILTIN([__builtin_clz])
 AX_GCC_BUILTIN([__builtin_clzll])
 AX_GCC_BUILTIN([__builtin_ctz])
diff --git a/scons/gallium.py b/scons/gallium.py
index dd5ca56..2eb6e91 100755
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -606,6 +606,8 @@ def generate(env):
             ]
         if distutils.version.LooseVersion(ccversion) >= 
distutils.version.LooseVersion('4.5'):
             cppdefines += ['HAVE___BUILTIN_UNREACHABLE']
+        if distutils.version.LooseVersion(ccversion) >= 
distutils.version.LooseVersion('4.7'):
+            cppdefines += ['HAVE___BUILTIN_CLRSB']
 
     # Load tools
     env.Tool('lex')
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 7794801..d5830b0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -60,6 +60,7 @@
 #include "tgsi_exec.h"
 #include "util/u_memory.h"
 #include "util/u_math.h"
+#include "util/bitcount.h"
 
 
 #define DEBUG_EXECUTION 0
diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c 
b/src/gallium/auxiliary/tgsi/tgsi_scan.c
index 42bc61e..b87a7b0 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
@@ -41,7 +41,7 @@
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 #include "tgsi/tgsi_scan.h"
-
+#include "util/bitcount.h"
 
 
 
diff --git a/src/gallium/auxiliary/util/u_helpers.c 
b/src/gallium/auxiliary/util/u_helpers.c
index ac1edcd..f8df4b9 100644
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -27,6 +27,7 @@
 
 #include "util/u_helpers.h"
 #include "util/u_inlines.h"
+#include "util/bitcount.h"
 
 /**
  * This function is used to copy an array of pipe_vertex_buffer structures,
diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index 0113fb1..6004e96 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -52,10 +52,6 @@ extern "C" {
 #include <float.h>
 #include <stdarg.h>
 
-#ifdef PIPE_OS_UNIX
-#include <strings.h> /* for ffs */
-#endif
-
 
 #ifndef M_SQRT2
 #define M_SQRT2 1.41421356237309504880
@@ -492,85 +488,6 @@ util_half_inf_sign(int16_t x)
    return (x < 0) ? -1 : 1;
 }
 
-
-/**
- * Find first bit set in word.  Least significant bit is 1.
- * Return 0 if no bits set.
- */
-#ifndef FFS_DEFINED
-#define FFS_DEFINED 1
-
-#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64)
-unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
-#pragma intrinsic(_BitScanForward)
-static INLINE
-unsigned long ffs( unsigned long u )
-{
-   unsigned long i;
-   if (_BitScanForward(&i, u))
-      return i + 1;
-   else
-      return 0;
-}
-#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
-static INLINE
-unsigned ffs( unsigned u )
-{
-   unsigned i;
-
-   if (u == 0) {
-      return 0;
-   }
-
-   __asm bsf eax, [u]
-   __asm inc eax
-   __asm mov [i], eax
-
-   return i;
-}
-#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID)
-#define ffs __builtin_ffs
-#endif
-
-#endif /* FFS_DEFINED */
-
-/**
- * Find last bit set in a word.  The least significant bit is 1.
- * Return 0 if no bits are set.
- */
-static INLINE unsigned
-util_last_bit(unsigned u)
-{
-#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304)
-   return u == 0 ? 0 : 32 - __builtin_clz(u);
-#else
-   unsigned r = 0;
-   while (u) {
-       r++;
-       u >>= 1;
-   }
-   return r;
-#endif
-}
-
-/**
- * Find last bit in a word that does not match the sign bit. The least
- * significant bit is 1.
- * Return 0 if no bits are set.
- */
-static INLINE unsigned
-util_last_bit_signed(int i)
-{
-#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 407) && 
!defined(__INTEL_COMPILER)
-   return 31 - __builtin_clrsb(i);
-#else
-   if (i >= 0)
-      return util_last_bit(i);
-   else
-      return util_last_bit(~(unsigned)i);
-#endif
-}
-
 /* Destructively loop over all of the bits in a mask as in:
  *
  * while (mymask) {
@@ -705,41 +622,6 @@ util_next_power_of_two(unsigned x)
 
 
 /**
- * Return number of bits set in n.
- */
-static INLINE unsigned
-util_bitcount(unsigned n)
-{
-#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 304)
-   return __builtin_popcount(n);
-#else
-   /* K&R classic bitcount.
-    *
-    * For each iteration, clear the LSB from the bitfield.
-    * Requires only one iteration per set bit, instead of
-    * one iteration per bit less than highest set bit.
-    */
-   unsigned bits = 0;
-   for (bits; n; bits++) {
-      n &= n - 1;
-   }
-   return bits;
-#endif
-}
-
-
-static INLINE unsigned
-util_bitcount64(uint64_t n)
-{
-#ifdef HAVE___BUILTIN_POPCOUNTLL
-   return __builtin_popcountll(n);
-#else
-   return util_bitcount(n) + util_bitcount(n >> 32);
-#endif
-}
-
-
-/**
  * Reverse bits in n
  * Algorithm taken from:
  * 
http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer
diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index b1b89bf..2a3b059 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -96,6 +96,7 @@
 #include "translate/translate_cache.h"
 #include "cso_cache/cso_cache.h"
 #include "cso_cache/cso_hash.h"
+#include "util/bitcount.h"
 
 struct u_vbuf_elements {
    unsigned count;
diff --git a/src/gallium/drivers/i915/i915_state_emit.c 
b/src/gallium/drivers/i915/i915_state_emit.c
index 6244f48..0503dbc 100644
--- a/src/gallium/drivers/i915/i915_state_emit.c
+++ b/src/gallium/drivers/i915/i915_state_emit.c
@@ -40,6 +40,7 @@
 #include "util/u_format.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/bitcount.h"
 
 struct i915_tracked_hw_state {
    const char *name;
diff --git a/src/gallium/drivers/ilo/ilo_shader.c 
b/src/gallium/drivers/ilo/ilo_shader.c
index f4203aa..0bbeb74 100644
--- a/src/gallium/drivers/ilo/ilo_shader.c
+++ b/src/gallium/drivers/ilo/ilo_shader.c
@@ -28,6 +28,7 @@
 #include "genhw/genhw.h" /* for SBE setup */
 #include "tgsi/tgsi_parse.h"
 #include "intel_winsys.h"
+#include "util/bitcount.h"
 
 #include "shader/ilo_shader_internal.h"
 #include "ilo_builder.h"
diff --git a/src/gallium/drivers/ilo/ilo_state.c 
b/src/gallium/drivers/ilo/ilo_state.c
index 18c1566..89c489e 100644
--- a/src/gallium/drivers/ilo/ilo_state.c
+++ b/src/gallium/drivers/ilo/ilo_state.c
@@ -27,6 +27,7 @@
 
 #include "util/u_helpers.h"
 #include "util/u_upload_mgr.h"
+#include "util/bitcount.h"
 
 #include "ilo_context.h"
 #include "ilo_resource.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c 
b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
index 41f6fbf..03766ff 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c
@@ -31,6 +31,7 @@
 
 #include <limits.h>
 #include "util/u_math.h"
+#include "util/bitcount.h"
 #include "lp_debug.h"
 #include "lp_perf.h"
 #include "lp_rast_priv.h"
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c 
b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 900df71..1d1d1e2 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -33,6 +33,7 @@
 #include "util/u_memory.h"
 #include "util/u_rect.h"
 #include "util/u_sse.h"
+#include "util/bitcount.h"
 #include "lp_perf.h"
 #include "lp_setup_context.h"
 #include "lp_rast.h"
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 719f980..e4d4126 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -23,10 +23,7 @@
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_target.h"
 #include "codegen/nv50_ir_build_util.h"
-
-extern "C" {
-#include "util/u_math.h"
-}
+#include "util/bitcount.h"
 
 namespace nv50_ir {
 
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
index d26acb3..d6cef37 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp
@@ -21,6 +21,7 @@
  */
 
 #include "codegen/nv50_ir_util.h"
+#include "util/bitcount.h"
 
 namespace nv50_ir {
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 825e0ba..e1a302f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -26,6 +26,7 @@
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "pipe/p_screen.h"
+#include "util/bitcount.h"
 
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_screen.h"
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
index c698782..9457661 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -25,6 +25,7 @@
 #include "pipe/p_defines.h"
 #include "pipe/p_state.h"
 #include "util/u_inlines.h"
+#include "util/bitcount.h"
 
 #include "nv50/nv50_context.h"
 
diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
b/src/gallium/drivers/r600/evergreen_compute.c
index 38b78c7..71d43d8 100644
--- a/src/gallium/drivers/r600/evergreen_compute.c
+++ b/src/gallium/drivers/r600/evergreen_compute.c
@@ -37,6 +37,7 @@
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_framebuffer.h"
+#include "util/bitcount.h"
 #include "pipebuffer/pb_buffer.h"
 #include "evergreend.h"
 #include "r600_shader.h"
diff --git a/src/gallium/drivers/r600/r600_blit.c 
b/src/gallium/drivers/r600/r600_blit.c
index bdc5f9f..b28826e 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -25,6 +25,7 @@
 #include "evergreen_compute.h"
 #include "util/u_surface.h"
 #include "util/u_format.h"
+#include "util/bitcount.h"
 #include "evergreend.h"
 
 enum r600_blitter_op /* bitmask */
diff --git a/src/gallium/drivers/r600/r600_state_common.c 
b/src/gallium/drivers/r600/r600_state_common.c
index c3f21cb..793b29c 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -33,6 +33,7 @@
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
 #include "util/u_math.h"
+#include "util/bitcount.h"
 #include "tgsi/tgsi_parse.h"
 
 void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw)
diff --git a/src/gallium/drivers/radeon/r600_streamout.c 
b/src/gallium/drivers/radeon/r600_streamout.c
index e2413c2..22d3279 100644
--- a/src/gallium/drivers/radeon/r600_streamout.c
+++ b/src/gallium/drivers/radeon/r600_streamout.c
@@ -28,6 +28,7 @@
 #include "r600_cs.h"
 
 #include "util/u_memory.h"
+#include "util/bitcount.h"
 
 static void r600_set_streamout_enable(struct r600_common_context *rctx, bool 
enable);
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 9227034..4bcb248 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -41,6 +41,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_upload_mgr.h"
+#include "util/bitcount.h"
 
 #define SI_NUM_CONTEXTS 16
 
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c 
b/src/gallium/drivers/radeonsi/si_state_draw.c
index 708e42a..5db9053 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -34,6 +34,7 @@
 #include "util/u_memory.h"
 #include "util/u_prim.h"
 #include "util/u_upload_mgr.h"
+#include "util/bitcount.h"
 
 /*
  * Shaders
@@ -115,7 +116,7 @@ static void si_shader_gs(struct si_shader *shader)
        si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
 
        si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
-                      util_bitcount64(shader->selector->gs_used_inputs) * (16 
>> 2));
+                      util_bitcount_64(shader->selector->gs_used_inputs) * (16 
>> 2));
        si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
 
        si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c 
b/src/gallium/drivers/softpipe/sp_quad_fs.c
index 82c58d0..168b77b 100644
--- a/src/gallium/drivers/softpipe/sp_quad_fs.c
+++ b/src/gallium/drivers/softpipe/sp_quad_fs.c
@@ -40,6 +40,7 @@
 #include "util/u_memory.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_shader_tokens.h"
+#include "util/bitcount.h"
 
 #include "sp_context.h"
 #include "sp_state.h"
diff --git a/src/gallium/state_trackers/clover/api/memory.cpp 
b/src/gallium/state_trackers/clover/api/memory.cpp
index a094e74..79159f0 100644
--- a/src/gallium/state_trackers/clover/api/memory.cpp
+++ b/src/gallium/state_trackers/clover/api/memory.cpp
@@ -24,6 +24,7 @@
 #include "api/util.hpp"
 #include "core/memory.hpp"
 #include "core/format.hpp"
+#include "util/bitcount.h"
 
 using namespace clover;
 
diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c 
b/src/gallium/state_trackers/glx/xlib/glx_api.c
index 976791b..9914116 100644
--- a/src/gallium/state_trackers/glx/xlib/glx_api.c
+++ b/src/gallium/state_trackers/glx/xlib/glx_api.c
@@ -402,9 +402,9 @@ get_visual( Display *dpy, int scr, unsigned int depth, int 
xclass )
     * 10 bits per color channel.  Mesa's limited to a max of 8 bits/channel.
     */
    if (vis && depth > 24 && (xclass==TrueColor || xclass==DirectColor)) {
-      if (_mesa_bitcount((GLuint) vis->red_mask  ) <= 8 &&
-          _mesa_bitcount((GLuint) vis->green_mask) <= 8 &&
-          _mesa_bitcount((GLuint) vis->blue_mask ) <= 8) {
+      if (util_bitcount((GLuint) vis->red_mask  ) <= 8 &&
+          util_bitcount((GLuint) vis->green_mask) <= 8 &&
+          util_bitcount((GLuint) vis->blue_mask ) <= 8) {
          return vis;
       }
       else {
diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c 
b/src/gallium/state_trackers/glx/xlib/xm_api.c
index 1b77729..74c5637 100644
--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -736,9 +736,9 @@ XMesaVisual XMesaCreateVisual( Display *display,
    {
       const int xclass = v->visualType;
       if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) {
-         red_bits   = _mesa_bitcount(GET_REDMASK(v));
-         green_bits = _mesa_bitcount(GET_GREENMASK(v));
-         blue_bits  = _mesa_bitcount(GET_BLUEMASK(v));
+         red_bits   = util_bitcount(GET_REDMASK(v));
+         green_bits = util_bitcount(GET_GREENMASK(v));
+         blue_bits  = util_bitcount(GET_BLUEMASK(v));
       }
       else {
          /* this is an approximation */
@@ -1067,8 +1067,8 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p,
       if (ctx->Extensions.ARB_texture_non_power_of_two) {
          target = GLX_TEXTURE_2D_EXT;
       }
-      else if (   _mesa_bitcount(b->width)  == 1
-               && _mesa_bitcount(b->height) == 1) {
+      else if (   util_bitcount(b->width)  == 1
+               && util_bitcount(b->height) == 1) {
          /* power of two size */
          if (b->height == 1) {
             target = GLX_TEXTURE_1D_EXT;
diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c
index 87532c1..22a5b3e 100644
--- a/src/mesa/drivers/common/meta.c
+++ b/src/mesa/drivers/common/meta.c
@@ -85,6 +85,7 @@
 #include "main/enums.h"
 #include "main/glformats.h"
 #include "util/ralloc.h"
+#include "util/bitcount.h"
 
 /** Return offset in bytes of the field within a vertex struct */
 #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
@@ -1640,7 +1641,7 @@ _mesa_meta_drawbuffers_from_bitfield(GLbitfield bits)
    assert((bits & ~BUFFER_BITS_COLOR) == 0);
 
    /* Make sure we don't overflow any arrays. */
-   assert(_mesa_bitcount(bits) <= MAX_DRAW_BUFFERS);
+   assert(util_bitcount(bits) <= MAX_DRAW_BUFFERS);
 
    enums[0] = GL_NONE;
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 844f5e4..7ccdff5 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1346,7 +1346,7 @@ inline int count_trailing_one_bits(unsigned value)
 #ifdef HAVE___BUILTIN_CTZ
    return __builtin_ctz(~value);
 #else
-   return _mesa_bitcount(value & ~(value + 1));
+   return util_bitcount(value & ~(value + 1));
 #endif
 }
 
@@ -1388,7 +1388,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
     */
    unsigned stack_depth = 0;
    for (unsigned i = 0; i < num_samples; ++i) {
-      assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
+      assert(stack_depth == util_bitcount(i)); /* Loop invariant */
 
       /* Push sample i onto the stack */
       assert(stack_depth < ARRAY_SIZE(texture_data));
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c 
b/src/mesa/drivers/dri/i965/brw_curbe.c
index 1a828ed..46a3ccb 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -85,7 +85,7 @@ static void calculate_curbe_offsets( struct brw_context *brw )
 
    /* _NEW_TRANSFORM */
    if (ctx->Transform.ClipPlanesEnabled) {
-      GLuint nr_planes = 6 + 
_mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
+      GLuint nr_planes = 6 + 
util_bitcount_64(ctx->Transform.ClipPlanesEnabled);
       nr_clip_regs = (nr_planes * 4 + 15) / 16;
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index b28eaf2..687ab42 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -369,11 +369,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx,
     * index.
     */
    brw->wm.base.sampler_count =
-      _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed);
+      util_last_bit(ctx->FragmentProgram._Current->Base.SamplersUsed);
    brw->gs.base.sampler_count = ctx->GeometryProgram._Current ?
-      _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
+      util_last_bit(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0;
    brw->vs.base.sampler_count =
-      _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed);
+      util_last_bit(ctx->VertexProgram._Current->Base.SamplersUsed);
 
    /* We have to validate the textures *before* checking for fallbacks;
     * otherwise, the software fallback won't be able to rely on the
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index aa1d8d2..700c087 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1719,8 +1719,8 @@ fs_visitor::calculate_urb_setup()
    int urb_next = 0;
    /* Figure out where each of the incoming setup attributes lands. */
    if (brw->gen >= 6) {
-      if (_mesa_bitcount_64(prog->InputsRead &
-                            BRW_FS_VARYING_INPUT_MASK) <= 16) {
+      if (util_bitcount_64(prog->InputsRead &
+                           BRW_FS_VARYING_INPUT_MASK) <= 16) {
          /* The SF/SBE pipeline stage can do arbitrary rearrangement of the
           * first 16 varying inputs, so we can put them wherever we want.
           * Just put them in order.
@@ -3791,11 +3791,11 @@ brw_fs_precompile(struct gl_context *ctx, struct 
gl_shader_program *prog)
       key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
    }
 
-   if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead &
-                                         BRW_FS_VARYING_INPUT_MASK) > 16)
+   if (brw->gen < 6 || util_bitcount_64(fp->Base.InputsRead &
+                                        BRW_FS_VARYING_INPUT_MASK) > 16)
       key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS;
 
-   unsigned sampler_count = _mesa_fls(fp->Base.SamplersUsed);
+   unsigned sampler_count = util_last_bit(fp->Base.SamplersUsed);
    for (unsigned i = 0; i < sampler_count; i++) {
       if (fp->Base.ShadowSamplers & (1 << i)) {
          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
@@ -3811,7 +3811,7 @@ brw_fs_precompile(struct gl_context *ctx, struct 
gl_shader_program *prog)
       key.drawable_height = ctx->DrawBuffer->Height;
    }
 
-   key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten &
+   key.nr_color_regions = util_bitcount_64(fp->Base.OutputsWritten &
          ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) |
          BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)));
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 21dcf2d..62cecb1 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -864,7 +864,7 @@ backend_visitor::invalidate_cfg()
 void
 backend_visitor::assign_common_binding_table_offsets(uint32_t 
next_binding_table_offset)
 {
-   int num_textures = _mesa_fls(prog->SamplersUsed);
+   int num_textures = util_last_bit(prog->SamplersUsed);
 
    stage_prog_data->binding_table.texture_start = next_binding_table_offset;
    next_binding_table_offset += num_textures;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index df589b8..67af265 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1918,7 +1918,7 @@ brw_vec4_setup_prog_key_for_precompile(struct gl_context 
*ctx,
    key->program_string_id = id;
    key->clamp_vertex_color = ctx->API == API_OPENGL_COMPAT;
 
-   unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
+   unsigned sampler_count = util_last_bit(prog->SamplersUsed);
    for (unsigned i = 0; i < sampler_count; i++) {
       if (prog->ShadowSamplers & (1 << i)) {
          /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
index db0e6cc..d876bf5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp
@@ -368,7 +368,7 @@ vec4_gs_visitor::emit_control_data_bits()
          src_reg prev_count(this, glsl_type::uint_type);
          emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu));
          unsigned log2_bits_per_vertex =
-            _mesa_fls(c->control_data_bits_per_vertex);
+            util_last_bit(c->control_data_bits_per_vertex);
          emit(SHR(dst_reg(dword_index), prev_count,
                   (uint32_t) (6 - log2_bits_per_vertex)));
       }
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 5863573..5b692ee 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -539,8 +539,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
       (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
 
    /* BRW_NEW_VUE_MAP_GEOM_OUT */
-   if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
-                                         BRW_FS_VARYING_INPUT_MASK) > 16)
+   if (brw->gen < 6 || util_bitcount_64(fp->program.Base.InputsRead &
+                                        BRW_FS_VARYING_INPUT_MASK) > 16)
       key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index ef46dd7..9436bd2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -769,7 +769,7 @@ update_stage_texture_surfaces(struct brw_context *brw,
    else
       surf_offset += stage_state->prog_data->binding_table.texture_start;
 
-   unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
+   unsigned num_samplers = util_last_bit(prog->SamplersUsed);
    for (unsigned s = 0; s < num_samplers; s++) {
       surf_offset[s] = 0;
 
diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c
index ee05f8a..b1b470a 100644
--- a/src/mesa/drivers/x11/fakeglx.c
+++ b/src/mesa/drivers/x11/fakeglx.c
@@ -528,9 +528,9 @@ get_visual( Display *dpy, int scr, unsigned int depth, int 
xclass )
     * 10 bits per color channel.  Mesa's limited to a max of 8 bits/channel.
     */
    if (vis && depth > 24 && (xclass==TrueColor || xclass==DirectColor)) {
-      if (_mesa_bitcount((GLuint) vis->red_mask  ) <= 8 &&
-          _mesa_bitcount((GLuint) vis->green_mask) <= 8 &&
-          _mesa_bitcount((GLuint) vis->blue_mask ) <= 8) {
+      if (util_bitcount((GLuint) vis->red_mask  ) <= 8 &&
+          util_bitcount((GLuint) vis->green_mask) <= 8 &&
+          util_bitcount((GLuint) vis->blue_mask ) <= 8) {
          return vis;
       }
       else {
diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c
index 2d66dbd..c85b9f6 100644
--- a/src/mesa/drivers/x11/xm_api.c
+++ b/src/mesa/drivers/x11/xm_api.c
@@ -462,9 +462,9 @@ setup_truecolor(XMesaVisual v, XMesaBuffer buffer, 
XMesaColormap cmap)
           3*16, 11*16,  1*16,  9*16,
          15*16,  7*16, 13*16,  5*16,
       };
-      GLint rBits = _mesa_bitcount(rmask);
-      GLint gBits = _mesa_bitcount(gmask);
-      GLint bBits = _mesa_bitcount(bmask);
+      GLint rBits = util_bitcount(rmask);
+      GLint gBits = util_bitcount(gmask);
+      GLint bBits = util_bitcount(bmask);
       GLint maxBits;
       GLuint i;
 
@@ -827,9 +827,9 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display,
    {
       const int xclass = v->visualType;
       if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) {
-         red_bits   = _mesa_bitcount(GET_REDMASK(v));
-         green_bits = _mesa_bitcount(GET_GREENMASK(v));
-         blue_bits  = _mesa_bitcount(GET_BLUEMASK(v));
+         red_bits   = util_bitcount(GET_REDMASK(v));
+         green_bits = util_bitcount(GET_GREENMASK(v));
+         blue_bits  = util_bitcount(GET_BLUEMASK(v));
       }
       else {
          /* this is an approximation */
@@ -1091,8 +1091,8 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap 
p,
       if (ctx->Extensions.ARB_texture_non_power_of_two) {
          target = GLX_TEXTURE_2D_EXT;
       }
-      else if (   _mesa_bitcount(width)  == 1
-               && _mesa_bitcount(height) == 1) {
+      else if (   util_bitcount(width)  == 1
+               && util_bitcount(height) == 1) {
          /* power of two size */
          if (height == 1) {
             target = GLX_TEXTURE_1D_EXT;
diff --git a/src/mesa/main/bitset.h b/src/mesa/main/bitset.h
index 601fd0e..f50b14f 100644
--- a/src/mesa/main/bitset.h
+++ b/src/mesa/main/bitset.h
@@ -32,6 +32,7 @@
 #define BITSET_H
 
 #include "imports.h"
+#include "util/bitcount.h"
 
 /****************************************************************************
  * generic bitset implementation
diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index 1ee2009..8d50447 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -374,7 +374,7 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers)
           *  Previous versions of the OpenGL specification say 
INVALID_OPERATION,
           *  but the Khronos conformance tests expect INVALID_ENUM.
           */
-         if (_mesa_bitcount(destMask[output]) > 1) {
+         if (util_bitcount(destMask[output]) > 1) {
             _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffersARB(buffer)");
             return;
          }
@@ -497,7 +497,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
     * (ex: glDrawBuffer(GL_FRONT_AND_BACK)).
     * Otherwise, destMask[x] can only have one bit set.
     */
-   if (n > 0 && _mesa_bitcount(destMask[0]) > 1) {
+   if (n > 0 && util_bitcount(destMask[0]) > 1) {
       GLuint count = 0, destMask0 = destMask[0];
       while (destMask0) {
          GLint bufIndex = ffs(destMask0) - 1;
@@ -517,7 +517,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
          if (destMask[buf]) {
             GLint bufIndex = ffs(destMask[buf]) - 1;
             /* only one bit should be set in the destMask[buf] field */
-            ASSERT(_mesa_bitcount(destMask[buf]) == 1);
+            ASSERT(util_bitcount(destMask[buf]) == 1);
             if (fb->_ColorDrawBufferIndexes[buf] != bufIndex) {
               updated_drawbuffers(ctx);
                fb->_ColorDrawBufferIndexes[buf] = bufIndex;
diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c
index b8c7548..ac4b698 100644
--- a/src/mesa/main/imports.c
+++ b/src/mesa/main/imports.c
@@ -217,94 +217,6 @@ _mesa_align_realloc(void *oldBuffer, size_t oldSize, 
size_t newSize,
 /*@{*/
 
 
-#ifndef HAVE___BUILTIN_FFS
-/**
- * Find the first bit set in a word.
- */
-int
-ffs(int i)
-{
-   register int bit = 0;
-   if (i != 0) {
-      if ((i & 0xffff) == 0) {
-         bit += 16;
-         i >>= 16;
-      }
-      if ((i & 0xff) == 0) {
-         bit += 8;
-         i >>= 8;
-      }
-      if ((i & 0xf) == 0) {
-         bit += 4;
-         i >>= 4;
-      }
-      while ((i & 1) == 0) {
-         bit++;
-         i >>= 1;
-      }
-      bit++;
-   }
-   return bit;
-}
-#endif
-
-#ifndef HAVE___BUILTIN_FFSLL
-/**
- * Find position of first bit set in given value.
- * XXX Warning: this function can only be used on 64-bit systems!
- * \return  position of least-significant bit set, starting at 1, return zero
- *          if no bits set.
- */
-int
-ffsll(long long int val)
-{
-   int bit;
-
-   assert(sizeof(val) == 8);
-
-   bit = ffs((int) val);
-   if (bit != 0)
-      return bit;
-
-   bit = ffs((int) (val >> 32));
-   if (bit != 0)
-      return 32 + bit;
-
-   return 0;
-}
-#endif
-
-
-#ifndef HAVE___BUILTIN_POPCOUNT
-/**
- * Return number of bits set in given GLuint.
- */
-unsigned int
-_mesa_bitcount(unsigned int n)
-{
-   unsigned int bits;
-   for (bits = 0; n > 0; n = n >> 1) {
-      bits += (n & 1);
-   }
-   return bits;
-}
-#endif
-
-#ifndef HAVE___BUILTIN_POPCOUNTLL
-/**
- * Return number of bits set in given 64-bit uint.
- */
-unsigned int
-_mesa_bitcount_64(uint64_t n)
-{
-   unsigned int bits;
-   for (bits = 0; n > 0; n = n >> 1) {
-      bits += (n & 1);
-   }
-   return bits;
-}
-#endif
-
 
 /* Using C99 rounding functions for roundToEven() implementation is
  * difficult, because round(), rint, and nearbyint() are affected by
diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h
index 436d165..0b03816 100644
--- a/src/mesa/main/imports.h
+++ b/src/mesa/main/imports.h
@@ -39,6 +39,7 @@
 #include "compiler.h"
 #include "glheader.h"
 #include "errors.h"
+#include "util/bitcount.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -469,59 +470,6 @@ extern void
 _mesa_exec_free( void *addr );
 
 
-#ifndef FFS_DEFINED
-#define FFS_DEFINED 1
-#ifdef HAVE___BUILTIN_FFS
-#define ffs __builtin_ffs
-#else
-extern int ffs(int i);
-#endif
-
-#ifdef HAVE___BUILTIN_FFSLL
-#define ffsll __builtin_ffsll
-#else
-extern int ffsll(long long int i);
-#endif
-#endif /* FFS_DEFINED */
-
-
-#ifdef HAVE___BUILTIN_POPCOUNT
-#define _mesa_bitcount(i) __builtin_popcount(i)
-#else
-extern unsigned int
-_mesa_bitcount(unsigned int n);
-#endif
-
-#ifdef HAVE___BUILTIN_POPCOUNTLL
-#define _mesa_bitcount_64(i) __builtin_popcountll(i)
-#else
-extern unsigned int
-_mesa_bitcount_64(uint64_t n);
-#endif
-
-/**
- * Find the last (most significant) bit set in a word.
- *
- * Essentially ffs() in the reverse direction.
- */
-static inline unsigned int
-_mesa_fls(unsigned int n)
-{
-#ifdef HAVE___BUILTIN_CLZ
-   return n == 0 ? 0 : 32 - __builtin_clz(n);
-#else
-   unsigned int v = 1;
-
-   if (n == 0)
-      return 0;
-
-   while (n >>= 1)
-       v++;
-
-   return v;
-#endif
-}
-
 extern int
 _mesa_round_to_even(float val);
 
diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y
index 1664740..e79c944 100644
--- a/src/mesa/program/program_parse.y
+++ b/src/mesa/program/program_parse.y
@@ -2771,7 +2771,7 @@ _mesa_parse_arb_program(struct gl_context *ctx, GLenum 
target, const GLubyte *st
    state->prog->NumInstructions++;
 
    state->prog->NumParameters = state->prog->Parameters->NumParameters;
-   state->prog->NumAttributes = _mesa_bitcount_64(state->prog->InputsRead);
+   state->prog->NumAttributes = util_bitcount_64(state->prog->InputsRead);
 
    /*
     * Initialize native counts to logical counts.  The device driver may
diff --git a/src/util/bitcount.h b/src/util/bitcount.h
new file mode 100644
index 0000000..87d92d5
--- /dev/null
+++ b/src/util/bitcount.h
@@ -0,0 +1,196 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 1999-2007  Brian Paul   All Rights Reserved.
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Various utilities for counting bits.
+ */
+
+#ifndef UTIL_BITCOUNT_H
+#define UTIL_BITCOUNT_H
+
+#include <assert.h>
+#include <string.h>
+#include <stdint.h>
+#ifndef _MSC_VER
+#include <strings.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Find first bit set in word.  Least significant bit is 1.
+ * Return 0 if no bits set.
+ */
+#if _SVID_SOURCE || _BSD_SOURCE || _POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE 
>= 700 
+/* We get ffs for free */
+#elif defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64)
+unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask);
+#pragma intrinsic(_BitScanForward)
+static inline
+unsigned long ffs( unsigned long u )
+{
+   unsigned long i;
+   if (_BitScanForward(&i, u))
+      return i + 1;
+   else
+      return 0;
+}
+#elif defined(HAVE___BUILTIN_FFS)
+#  define ffs __builtin_ffs
+#else
+static inline int
+ffs(int i)
+{
+   int bit = 0;
+   if (i != 0) {
+      if ((i & 0xffff) == 0) {
+         bit += 16;
+         i >>= 16;
+      }
+      if ((i & 0xff) == 0) {
+         bit += 8;
+         i >>= 8;
+      }
+      if ((i & 0xf) == 0) {
+         bit += 4;
+         i >>= 4;
+      }
+      while ((i & 1) == 0) {
+         bit++;
+         i >>= 1;
+      }
+      bit++;
+   }
+   return bit;
+}
+#endif
+
+
+#if defined(_GNU_SOURCE)
+/* We get ffsll for free */
+#elif defined(HAVE___BUILTIN_FFSLL)
+#  define ffsll __builtin_ffsll
+#else
+static inline int
+ffsll(long long int val)
+{
+   int bit;
+
+   assert(sizeof(val) == 8);
+
+   bit = ffs((int) val);
+   if (bit != 0)
+      return bit;
+
+   bit = ffs((int) (val >> 32));
+   if (bit != 0)
+      return 32 + bit;
+
+   return 0;
+}
+#endif
+
+
+/**
+ * Find the last (most significant) bit set in a word.
+ *
+ * Essentially ffs() in the reverse direction.
+ */
+static inline unsigned
+util_last_bit(unsigned n)
+{
+#ifdef HAVE___BUILTIN_CLZ
+   return n == 0 ? 0 : 32 - __builtin_clz(n);
+#else
+   unsigned int v = 1;
+
+   if (n == 0)
+      return 0;
+
+   while (n >>= 1)
+       v++;
+
+   return v;
+#endif
+}
+
+
+/**
+ * Find last bit in a word that does not match the sign bit. The least
+ * significant bit is 1.
+ * Return 0 if no bits are set.
+ */
+static inline unsigned
+util_last_bit_signed(int i)
+{
+#if HAVE___BUILTIN_CLRSB
+   return 31 - __builtin_clrsb(i);
+#else
+   if (i >= 0)
+      return util_last_bit(i);
+   else
+      return util_last_bit(~(unsigned)i);
+#endif
+}
+
+
+static inline unsigned
+util_bitcount(unsigned n)
+{
+#ifdef HAVE___BUILTIN_POPCOUNT
+   return __builtin_popcount(n);
+#else
+   unsigned bits = 0;
+   for (; n; bits++) {
+      n &= n - 1;
+   }
+   return bits;
+#endif
+}
+
+
+static inline unsigned
+util_bitcount_64(uint64_t n)
+{
+#ifdef HAVE___BUILTIN_POPCOUNTLL
+   return __builtin_popcountll(n);
+#define util_bitcount_64(i) __builtin_popcountll(i)
+   unsigned bits = 0;
+   for (; n; bits++) {
+      n &= n - 1;
+   }
+   return bits;
+#endif
+}
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* UTIL_BITCOUNT_H */
-- 
2.1.0

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to