--- configure.ac | 1 + scons/gallium.py | 2 + src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 + src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 +- src/gallium/auxiliary/util/u_helpers.c | 1 + src/gallium/auxiliary/util/u_math.h | 118 ------------- src/gallium/auxiliary/util/u_vbuf.c | 1 + src/gallium/drivers/i915/i915_state_emit.c | 1 + src/gallium/drivers/ilo/ilo_shader.c | 1 + src/gallium/drivers/ilo/ilo_state.c | 1 + src/gallium/drivers/llvmpipe/lp_rast_tri.c | 1 + src/gallium/drivers/llvmpipe/lp_setup_tri.c | 1 + .../drivers/nouveau/codegen/nv50_ir_peephole.cpp | 5 +- .../drivers/nouveau/codegen/nv50_ir_util.cpp | 1 + src/gallium/drivers/nouveau/nv50/nv50_screen.c | 1 + .../drivers/nouveau/nv50/nv50_shader_state.c | 1 + src/gallium/drivers/r600/evergreen_compute.c | 1 + src/gallium/drivers/r600/r600_blit.c | 1 + src/gallium/drivers/r600/r600_state_common.c | 1 + src/gallium/drivers/radeon/r600_streamout.c | 1 + src/gallium/drivers/radeonsi/si_descriptors.c | 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 3 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 1 + src/gallium/state_trackers/clover/api/memory.cpp | 1 + src/gallium/state_trackers/glx/xlib/glx_api.c | 6 +- src/gallium/state_trackers/glx/xlib/xm_api.c | 10 +- src/mesa/drivers/common/meta.c | 3 +- src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 4 +- src/mesa/drivers/dri/i965/brw_curbe.c | 2 +- src/mesa/drivers/dri/i965/brw_draw.c | 6 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 12 +- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +- src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 2 +- src/mesa/drivers/dri/i965/brw_wm.c | 4 +- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/x11/fakeglx.c | 6 +- src/mesa/drivers/x11/xm_api.c | 16 +- src/mesa/main/bitset.h | 1 + src/mesa/main/buffers.c | 6 +- src/mesa/main/imports.c | 88 --------- src/mesa/main/imports.h | 54 +----- src/mesa/program/program_parse.y | 2 +- src/util/bitcount.h | 196 +++++++++++++++++++++ 44 files changed, 266 insertions(+), 307 deletions(-) create mode 100644 src/util/bitcount.h
diff --git a/configure.ac b/configure.ac index 03f1bca..be673da 100644 --- a/configure.ac +++ b/configure.ac @@ -130,6 +130,7 @@ fi dnl Check for compiler builtins AX_GCC_BUILTIN([__builtin_bswap32]) AX_GCC_BUILTIN([__builtin_bswap64]) +AX_GCC_BUILTIN([__builtin_clrsb]) AX_GCC_BUILTIN([__builtin_clz]) AX_GCC_BUILTIN([__builtin_clzll]) AX_GCC_BUILTIN([__builtin_ctz]) diff --git a/scons/gallium.py b/scons/gallium.py index dd5ca56..2eb6e91 100755 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -606,6 +606,8 @@ def generate(env): ] if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.5'): cppdefines += ['HAVE___BUILTIN_UNREACHABLE'] + if distutils.version.LooseVersion(ccversion) >= distutils.version.LooseVersion('4.7'): + cppdefines += ['HAVE___BUILTIN_CLRSB'] # Load tools env.Tool('lex') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 7794801..d5830b0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -60,6 +60,7 @@ #include "tgsi_exec.h" #include "util/u_memory.h" #include "util/u_math.h" +#include "util/bitcount.h" #define DEBUG_EXECUTION 0 diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 42bc61e..b87a7b0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -41,7 +41,7 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_scan.h" - +#include "util/bitcount.h" diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c index ac1edcd..f8df4b9 100644 --- a/src/gallium/auxiliary/util/u_helpers.c +++ b/src/gallium/auxiliary/util/u_helpers.c @@ -27,6 +27,7 @@ #include "util/u_helpers.h" #include "util/u_inlines.h" +#include "util/bitcount.h" /** * This function is used to copy an array of pipe_vertex_buffer structures, diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 0113fb1..6004e96 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -52,10 +52,6 @@ extern "C" { #include <float.h> #include <stdarg.h> -#ifdef PIPE_OS_UNIX -#include <strings.h> /* for ffs */ -#endif - #ifndef M_SQRT2 #define M_SQRT2 1.41421356237309504880 @@ -492,85 +488,6 @@ util_half_inf_sign(int16_t x) return (x < 0) ? -1 : 1; } - -/** - * Find first bit set in word. Least significant bit is 1. - * Return 0 if no bits set. - */ -#ifndef FFS_DEFINED -#define FFS_DEFINED 1 - -#if defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64) -unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask); -#pragma intrinsic(_BitScanForward) -static INLINE -unsigned long ffs( unsigned long u ) -{ - unsigned long i; - if (_BitScanForward(&i, u)) - return i + 1; - else - return 0; -} -#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) -static INLINE -unsigned ffs( unsigned u ) -{ - unsigned i; - - if (u == 0) { - return 0; - } - - __asm bsf eax, [u] - __asm inc eax - __asm mov [i], eax - - return i; -} -#elif defined(__MINGW32__) || defined(PIPE_OS_ANDROID) -#define ffs __builtin_ffs -#endif - -#endif /* FFS_DEFINED */ - -/** - * Find last bit set in a word. The least significant bit is 1. - * Return 0 if no bits are set. - */ -static INLINE unsigned -util_last_bit(unsigned u) -{ -#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) - return u == 0 ? 0 : 32 - __builtin_clz(u); -#else - unsigned r = 0; - while (u) { - r++; - u >>= 1; - } - return r; -#endif -} - -/** - * Find last bit in a word that does not match the sign bit. The least - * significant bit is 1. - * Return 0 if no bits are set. - */ -static INLINE unsigned -util_last_bit_signed(int i) -{ -#if defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 407) && !defined(__INTEL_COMPILER) - return 31 - __builtin_clrsb(i); -#else - if (i >= 0) - return util_last_bit(i); - else - return util_last_bit(~(unsigned)i); -#endif -} - /* Destructively loop over all of the bits in a mask as in: * * while (mymask) { @@ -705,41 +622,6 @@ util_next_power_of_two(unsigned x) /** - * Return number of bits set in n. - */ -static INLINE unsigned -util_bitcount(unsigned n) -{ -#if defined(PIPE_CC_GCC) && (PIPE_CC_GCC_VERSION >= 304) - return __builtin_popcount(n); -#else - /* K&R classic bitcount. - * - * For each iteration, clear the LSB from the bitfield. - * Requires only one iteration per set bit, instead of - * one iteration per bit less than highest set bit. - */ - unsigned bits = 0; - for (bits; n; bits++) { - n &= n - 1; - } - return bits; -#endif -} - - -static INLINE unsigned -util_bitcount64(uint64_t n) -{ -#ifdef HAVE___BUILTIN_POPCOUNTLL - return __builtin_popcountll(n); -#else - return util_bitcount(n) + util_bitcount(n >> 32); -#endif -} - - -/** * Reverse bits in n * Algorithm taken from: * http://stackoverflow.com/questions/9144800/c-reverse-bits-in-unsigned-integer diff --git a/src/gallium/auxiliary/util/u_vbuf.c b/src/gallium/auxiliary/util/u_vbuf.c index b1b89bf..2a3b059 100644 --- a/src/gallium/auxiliary/util/u_vbuf.c +++ b/src/gallium/auxiliary/util/u_vbuf.c @@ -96,6 +96,7 @@ #include "translate/translate_cache.h" #include "cso_cache/cso_cache.h" #include "cso_cache/cso_hash.h" +#include "util/bitcount.h" struct u_vbuf_elements { unsigned count; diff --git a/src/gallium/drivers/i915/i915_state_emit.c b/src/gallium/drivers/i915/i915_state_emit.c index 6244f48..0503dbc 100644 --- a/src/gallium/drivers/i915/i915_state_emit.c +++ b/src/gallium/drivers/i915/i915_state_emit.c @@ -40,6 +40,7 @@ #include "util/u_format.h" #include "util/u_math.h" #include "util/u_memory.h" +#include "util/bitcount.h" struct i915_tracked_hw_state { const char *name; diff --git a/src/gallium/drivers/ilo/ilo_shader.c b/src/gallium/drivers/ilo/ilo_shader.c index f4203aa..0bbeb74 100644 --- a/src/gallium/drivers/ilo/ilo_shader.c +++ b/src/gallium/drivers/ilo/ilo_shader.c @@ -28,6 +28,7 @@ #include "genhw/genhw.h" /* for SBE setup */ #include "tgsi/tgsi_parse.h" #include "intel_winsys.h" +#include "util/bitcount.h" #include "shader/ilo_shader_internal.h" #include "ilo_builder.h" diff --git a/src/gallium/drivers/ilo/ilo_state.c b/src/gallium/drivers/ilo/ilo_state.c index 18c1566..89c489e 100644 --- a/src/gallium/drivers/ilo/ilo_state.c +++ b/src/gallium/drivers/ilo/ilo_state.c @@ -27,6 +27,7 @@ #include "util/u_helpers.h" #include "util/u_upload_mgr.h" +#include "util/bitcount.h" #include "ilo_context.h" #include "ilo_resource.h" diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 41f6fbf..03766ff 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -31,6 +31,7 @@ #include <limits.h> #include "util/u_math.h" +#include "util/bitcount.h" #include "lp_debug.h" #include "lp_perf.h" #include "lp_rast_priv.h" diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c index 900df71..1d1d1e2 100644 --- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_rect.h" #include "util/u_sse.h" +#include "util/bitcount.h" #include "lp_perf.h" #include "lp_setup_context.h" #include "lp_rast.h" diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp index 719f980..e4d4126 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp @@ -23,10 +23,7 @@ #include "codegen/nv50_ir.h" #include "codegen/nv50_ir_target.h" #include "codegen/nv50_ir_build_util.h" - -extern "C" { -#include "util/u_math.h" -} +#include "util/bitcount.h" namespace nv50_ir { diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp index d26acb3..d6cef37 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.cpp @@ -21,6 +21,7 @@ */ #include "codegen/nv50_ir_util.h" +#include "util/bitcount.h" namespace nv50_ir { diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c index 825e0ba..e1a302f 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c @@ -26,6 +26,7 @@ #include "util/u_format.h" #include "util/u_format_s3tc.h" #include "pipe/p_screen.h" +#include "util/bitcount.h" #include "nv50/nv50_context.h" #include "nv50/nv50_screen.h" diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c index c698782..9457661 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c @@ -25,6 +25,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "util/u_inlines.h" +#include "util/bitcount.h" #include "nv50/nv50_context.h" diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 38b78c7..71d43d8 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -37,6 +37,7 @@ #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_framebuffer.h" +#include "util/bitcount.h" #include "pipebuffer/pb_buffer.h" #include "evergreend.h" #include "r600_shader.h" diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c index bdc5f9f..b28826e 100644 --- a/src/gallium/drivers/r600/r600_blit.c +++ b/src/gallium/drivers/r600/r600_blit.c @@ -25,6 +25,7 @@ #include "evergreen_compute.h" #include "util/u_surface.h" #include "util/u_format.h" +#include "util/bitcount.h" #include "evergreend.h" enum r600_blitter_op /* bitmask */ diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index c3f21cb..793b29c 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -33,6 +33,7 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" #include "util/u_math.h" +#include "util/bitcount.h" #include "tgsi/tgsi_parse.h" void r600_init_command_buffer(struct r600_command_buffer *cb, unsigned num_dw) diff --git a/src/gallium/drivers/radeon/r600_streamout.c b/src/gallium/drivers/radeon/r600_streamout.c index e2413c2..22d3279 100644 --- a/src/gallium/drivers/radeon/r600_streamout.c +++ b/src/gallium/drivers/radeon/r600_streamout.c @@ -28,6 +28,7 @@ #include "r600_cs.h" #include "util/u_memory.h" +#include "util/bitcount.h" static void r600_set_streamout_enable(struct r600_common_context *rctx, bool enable); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 9227034..4bcb248 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -41,6 +41,7 @@ #include "util/u_memory.h" #include "util/u_upload_mgr.h" +#include "util/bitcount.h" #define SI_NUM_CONTEXTS 16 diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 708e42a..5db9053 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -34,6 +34,7 @@ #include "util/u_memory.h" #include "util/u_prim.h" #include "util/u_upload_mgr.h" +#include "util/bitcount.h" /* * Shaders @@ -115,7 +116,7 @@ static void si_shader_gs(struct si_shader *shader) si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize); si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE, - util_bitcount64(shader->selector->gs_used_inputs) * (16 >> 2)); + util_bitcount_64(shader->selector->gs_used_inputs) * (16 >> 2)); si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize); si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out); diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 82c58d0..168b77b 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -40,6 +40,7 @@ #include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/bitcount.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/state_trackers/clover/api/memory.cpp b/src/gallium/state_trackers/clover/api/memory.cpp index a094e74..79159f0 100644 --- a/src/gallium/state_trackers/clover/api/memory.cpp +++ b/src/gallium/state_trackers/clover/api/memory.cpp @@ -24,6 +24,7 @@ #include "api/util.hpp" #include "core/memory.hpp" #include "core/format.hpp" +#include "util/bitcount.h" using namespace clover; diff --git a/src/gallium/state_trackers/glx/xlib/glx_api.c b/src/gallium/state_trackers/glx/xlib/glx_api.c index 976791b..9914116 100644 --- a/src/gallium/state_trackers/glx/xlib/glx_api.c +++ b/src/gallium/state_trackers/glx/xlib/glx_api.c @@ -402,9 +402,9 @@ get_visual( Display *dpy, int scr, unsigned int depth, int xclass ) * 10 bits per color channel. Mesa's limited to a max of 8 bits/channel. */ if (vis && depth > 24 && (xclass==TrueColor || xclass==DirectColor)) { - if (_mesa_bitcount((GLuint) vis->red_mask ) <= 8 && - _mesa_bitcount((GLuint) vis->green_mask) <= 8 && - _mesa_bitcount((GLuint) vis->blue_mask ) <= 8) { + if (util_bitcount((GLuint) vis->red_mask ) <= 8 && + util_bitcount((GLuint) vis->green_mask) <= 8 && + util_bitcount((GLuint) vis->blue_mask ) <= 8) { return vis; } else { diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index 1b77729..74c5637 100644 --- a/src/gallium/state_trackers/glx/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -736,9 +736,9 @@ XMesaVisual XMesaCreateVisual( Display *display, { const int xclass = v->visualType; if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) { - red_bits = _mesa_bitcount(GET_REDMASK(v)); - green_bits = _mesa_bitcount(GET_GREENMASK(v)); - blue_bits = _mesa_bitcount(GET_BLUEMASK(v)); + red_bits = util_bitcount(GET_REDMASK(v)); + green_bits = util_bitcount(GET_GREENMASK(v)); + blue_bits = util_bitcount(GET_BLUEMASK(v)); } else { /* this is an approximation */ @@ -1067,8 +1067,8 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p, if (ctx->Extensions.ARB_texture_non_power_of_two) { target = GLX_TEXTURE_2D_EXT; } - else if ( _mesa_bitcount(b->width) == 1 - && _mesa_bitcount(b->height) == 1) { + else if ( util_bitcount(b->width) == 1 + && util_bitcount(b->height) == 1) { /* power of two size */ if (b->height == 1) { target = GLX_TEXTURE_1D_EXT; diff --git a/src/mesa/drivers/common/meta.c b/src/mesa/drivers/common/meta.c index 87532c1..22a5b3e 100644 --- a/src/mesa/drivers/common/meta.c +++ b/src/mesa/drivers/common/meta.c @@ -85,6 +85,7 @@ #include "main/enums.h" #include "main/glformats.h" #include "util/ralloc.h" +#include "util/bitcount.h" /** Return offset in bytes of the field within a vertex struct */ #define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD)) @@ -1640,7 +1641,7 @@ _mesa_meta_drawbuffers_from_bitfield(GLbitfield bits) assert((bits & ~BUFFER_BITS_COLOR) == 0); /* Make sure we don't overflow any arrays. */ - assert(_mesa_bitcount(bits) <= MAX_DRAW_BUFFERS); + assert(util_bitcount(bits) <= MAX_DRAW_BUFFERS); enums[0] = GL_NONE; diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp index 844f5e4..7ccdff5 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp @@ -1346,7 +1346,7 @@ inline int count_trailing_one_bits(unsigned value) #ifdef HAVE___BUILTIN_CTZ return __builtin_ctz(~value); #else - return _mesa_bitcount(value & ~(value + 1)); + return util_bitcount(value & ~(value + 1)); #endif } @@ -1388,7 +1388,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned num_samples) */ unsigned stack_depth = 0; for (unsigned i = 0; i < num_samples; ++i) { - assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */ + assert(stack_depth == util_bitcount(i)); /* Loop invariant */ /* Push sample i onto the stack */ assert(stack_depth < ARRAY_SIZE(texture_data)); diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c index 1a828ed..46a3ccb 100644 --- a/src/mesa/drivers/dri/i965/brw_curbe.c +++ b/src/mesa/drivers/dri/i965/brw_curbe.c @@ -85,7 +85,7 @@ static void calculate_curbe_offsets( struct brw_context *brw ) /* _NEW_TRANSFORM */ if (ctx->Transform.ClipPlanesEnabled) { - GLuint nr_planes = 6 + _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); + GLuint nr_planes = 6 + util_bitcount_64(ctx->Transform.ClipPlanesEnabled); nr_clip_regs = (nr_planes * 4 + 15) / 16; } diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index b28eaf2..687ab42 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -369,11 +369,11 @@ static bool brw_try_draw_prims( struct gl_context *ctx, * index. */ brw->wm.base.sampler_count = - _mesa_fls(ctx->FragmentProgram._Current->Base.SamplersUsed); + util_last_bit(ctx->FragmentProgram._Current->Base.SamplersUsed); brw->gs.base.sampler_count = ctx->GeometryProgram._Current ? - _mesa_fls(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; + util_last_bit(ctx->GeometryProgram._Current->Base.SamplersUsed) : 0; brw->vs.base.sampler_count = - _mesa_fls(ctx->VertexProgram._Current->Base.SamplersUsed); + util_last_bit(ctx->VertexProgram._Current->Base.SamplersUsed); /* We have to validate the textures *before* checking for fallbacks; * otherwise, the software fallback won't be able to rely on the diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index aa1d8d2..700c087 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1719,8 +1719,8 @@ fs_visitor::calculate_urb_setup() int urb_next = 0; /* Figure out where each of the incoming setup attributes lands. */ if (brw->gen >= 6) { - if (_mesa_bitcount_64(prog->InputsRead & - BRW_FS_VARYING_INPUT_MASK) <= 16) { + if (util_bitcount_64(prog->InputsRead & + BRW_FS_VARYING_INPUT_MASK) <= 16) { /* The SF/SBE pipeline stage can do arbitrary rearrangement of the * first 16 varying inputs, so we can put them wherever we want. * Just put them in order. @@ -3791,11 +3791,11 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; } - if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead & - BRW_FS_VARYING_INPUT_MASK) > 16) + if (brw->gen < 6 || util_bitcount_64(fp->Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS; - unsigned sampler_count = _mesa_fls(fp->Base.SamplersUsed); + unsigned sampler_count = util_last_bit(fp->Base.SamplersUsed); for (unsigned i = 0; i < sampler_count; i++) { if (fp->Base.ShadowSamplers & (1 << i)) { /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */ @@ -3811,7 +3811,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.drawable_height = ctx->DrawBuffer->Height; } - key.nr_color_regions = _mesa_bitcount_64(fp->Base.OutputsWritten & + key.nr_color_regions = util_bitcount_64(fp->Base.OutputsWritten & ~(BITFIELD64_BIT(FRAG_RESULT_DEPTH) | BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 21dcf2d..62cecb1 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -864,7 +864,7 @@ backend_visitor::invalidate_cfg() void backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) { - int num_textures = _mesa_fls(prog->SamplersUsed); + int num_textures = util_last_bit(prog->SamplersUsed); stage_prog_data->binding_table.texture_start = next_binding_table_offset; next_binding_table_offset += num_textures; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index df589b8..67af265 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1918,7 +1918,7 @@ brw_vec4_setup_prog_key_for_precompile(struct gl_context *ctx, key->program_string_id = id; key->clamp_vertex_color = ctx->API == API_OPENGL_COMPAT; - unsigned sampler_count = _mesa_fls(prog->SamplersUsed); + unsigned sampler_count = util_last_bit(prog->SamplersUsed); for (unsigned i = 0; i < sampler_count; i++) { if (prog->ShadowSamplers & (1 << i)) { /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index db0e6cc..d876bf5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -368,7 +368,7 @@ vec4_gs_visitor::emit_control_data_bits() src_reg prev_count(this, glsl_type::uint_type); emit(ADD(dst_reg(prev_count), this->vertex_count, 0xffffffffu)); unsigned log2_bits_per_vertex = - _mesa_fls(c->control_data_bits_per_vertex); + util_last_bit(c->control_data_bits_per_vertex); emit(SHR(dst_reg(dword_index), prev_count, (uint32_t) (6 - log2_bits_per_vertex))); } diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 5863573..5b692ee 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -539,8 +539,8 @@ static void brw_wm_populate_key( struct brw_context *brw, (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); /* BRW_NEW_VUE_MAP_GEOM_OUT */ - if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & - BRW_FS_VARYING_INPUT_MASK) > 16) + if (brw->gen < 6 || util_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) key->input_slots_valid = brw->vue_map_geom_out.slots_valid; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index ef46dd7..9436bd2 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -769,7 +769,7 @@ update_stage_texture_surfaces(struct brw_context *brw, else surf_offset += stage_state->prog_data->binding_table.texture_start; - unsigned num_samplers = _mesa_fls(prog->SamplersUsed); + unsigned num_samplers = util_last_bit(prog->SamplersUsed); for (unsigned s = 0; s < num_samplers; s++) { surf_offset[s] = 0; diff --git a/src/mesa/drivers/x11/fakeglx.c b/src/mesa/drivers/x11/fakeglx.c index ee05f8a..b1b470a 100644 --- a/src/mesa/drivers/x11/fakeglx.c +++ b/src/mesa/drivers/x11/fakeglx.c @@ -528,9 +528,9 @@ get_visual( Display *dpy, int scr, unsigned int depth, int xclass ) * 10 bits per color channel. Mesa's limited to a max of 8 bits/channel. */ if (vis && depth > 24 && (xclass==TrueColor || xclass==DirectColor)) { - if (_mesa_bitcount((GLuint) vis->red_mask ) <= 8 && - _mesa_bitcount((GLuint) vis->green_mask) <= 8 && - _mesa_bitcount((GLuint) vis->blue_mask ) <= 8) { + if (util_bitcount((GLuint) vis->red_mask ) <= 8 && + util_bitcount((GLuint) vis->green_mask) <= 8 && + util_bitcount((GLuint) vis->blue_mask ) <= 8) { return vis; } else { diff --git a/src/mesa/drivers/x11/xm_api.c b/src/mesa/drivers/x11/xm_api.c index 2d66dbd..c85b9f6 100644 --- a/src/mesa/drivers/x11/xm_api.c +++ b/src/mesa/drivers/x11/xm_api.c @@ -462,9 +462,9 @@ setup_truecolor(XMesaVisual v, XMesaBuffer buffer, XMesaColormap cmap) 3*16, 11*16, 1*16, 9*16, 15*16, 7*16, 13*16, 5*16, }; - GLint rBits = _mesa_bitcount(rmask); - GLint gBits = _mesa_bitcount(gmask); - GLint bBits = _mesa_bitcount(bmask); + GLint rBits = util_bitcount(rmask); + GLint gBits = util_bitcount(gmask); + GLint bBits = util_bitcount(bmask); GLint maxBits; GLuint i; @@ -827,9 +827,9 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, { const int xclass = v->visualType; if (xclass == GLX_TRUE_COLOR || xclass == GLX_DIRECT_COLOR) { - red_bits = _mesa_bitcount(GET_REDMASK(v)); - green_bits = _mesa_bitcount(GET_GREENMASK(v)); - blue_bits = _mesa_bitcount(GET_BLUEMASK(v)); + red_bits = util_bitcount(GET_REDMASK(v)); + green_bits = util_bitcount(GET_GREENMASK(v)); + blue_bits = util_bitcount(GET_BLUEMASK(v)); } else { /* this is an approximation */ @@ -1091,8 +1091,8 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, if (ctx->Extensions.ARB_texture_non_power_of_two) { target = GLX_TEXTURE_2D_EXT; } - else if ( _mesa_bitcount(width) == 1 - && _mesa_bitcount(height) == 1) { + else if ( util_bitcount(width) == 1 + && util_bitcount(height) == 1) { /* power of two size */ if (height == 1) { target = GLX_TEXTURE_1D_EXT; diff --git a/src/mesa/main/bitset.h b/src/mesa/main/bitset.h index 601fd0e..f50b14f 100644 --- a/src/mesa/main/bitset.h +++ b/src/mesa/main/bitset.h @@ -32,6 +32,7 @@ #define BITSET_H #include "imports.h" +#include "util/bitcount.h" /**************************************************************************** * generic bitset implementation diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 1ee2009..8d50447 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -374,7 +374,7 @@ _mesa_DrawBuffers(GLsizei n, const GLenum *buffers) * Previous versions of the OpenGL specification say INVALID_OPERATION, * but the Khronos conformance tests expect INVALID_ENUM. */ - if (_mesa_bitcount(destMask[output]) > 1) { + if (util_bitcount(destMask[output]) > 1) { _mesa_error(ctx, GL_INVALID_ENUM, "glDrawBuffersARB(buffer)"); return; } @@ -497,7 +497,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, * (ex: glDrawBuffer(GL_FRONT_AND_BACK)). * Otherwise, destMask[x] can only have one bit set. */ - if (n > 0 && _mesa_bitcount(destMask[0]) > 1) { + if (n > 0 && util_bitcount(destMask[0]) > 1) { GLuint count = 0, destMask0 = destMask[0]; while (destMask0) { GLint bufIndex = ffs(destMask0) - 1; @@ -517,7 +517,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, if (destMask[buf]) { GLint bufIndex = ffs(destMask[buf]) - 1; /* only one bit should be set in the destMask[buf] field */ - ASSERT(_mesa_bitcount(destMask[buf]) == 1); + ASSERT(util_bitcount(destMask[buf]) == 1); if (fb->_ColorDrawBufferIndexes[buf] != bufIndex) { updated_drawbuffers(ctx); fb->_ColorDrawBufferIndexes[buf] = bufIndex; diff --git a/src/mesa/main/imports.c b/src/mesa/main/imports.c index b8c7548..ac4b698 100644 --- a/src/mesa/main/imports.c +++ b/src/mesa/main/imports.c @@ -217,94 +217,6 @@ _mesa_align_realloc(void *oldBuffer, size_t oldSize, size_t newSize, /*@{*/ -#ifndef HAVE___BUILTIN_FFS -/** - * Find the first bit set in a word. - */ -int -ffs(int i) -{ - register int bit = 0; - if (i != 0) { - if ((i & 0xffff) == 0) { - bit += 16; - i >>= 16; - } - if ((i & 0xff) == 0) { - bit += 8; - i >>= 8; - } - if ((i & 0xf) == 0) { - bit += 4; - i >>= 4; - } - while ((i & 1) == 0) { - bit++; - i >>= 1; - } - bit++; - } - return bit; -} -#endif - -#ifndef HAVE___BUILTIN_FFSLL -/** - * Find position of first bit set in given value. - * XXX Warning: this function can only be used on 64-bit systems! - * \return position of least-significant bit set, starting at 1, return zero - * if no bits set. - */ -int -ffsll(long long int val) -{ - int bit; - - assert(sizeof(val) == 8); - - bit = ffs((int) val); - if (bit != 0) - return bit; - - bit = ffs((int) (val >> 32)); - if (bit != 0) - return 32 + bit; - - return 0; -} -#endif - - -#ifndef HAVE___BUILTIN_POPCOUNT -/** - * Return number of bits set in given GLuint. - */ -unsigned int -_mesa_bitcount(unsigned int n) -{ - unsigned int bits; - for (bits = 0; n > 0; n = n >> 1) { - bits += (n & 1); - } - return bits; -} -#endif - -#ifndef HAVE___BUILTIN_POPCOUNTLL -/** - * Return number of bits set in given 64-bit uint. - */ -unsigned int -_mesa_bitcount_64(uint64_t n) -{ - unsigned int bits; - for (bits = 0; n > 0; n = n >> 1) { - bits += (n & 1); - } - return bits; -} -#endif - /* Using C99 rounding functions for roundToEven() implementation is * difficult, because round(), rint, and nearbyint() are affected by diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index 436d165..0b03816 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -39,6 +39,7 @@ #include "compiler.h" #include "glheader.h" #include "errors.h" +#include "util/bitcount.h" #ifdef __cplusplus extern "C" { @@ -469,59 +470,6 @@ extern void _mesa_exec_free( void *addr ); -#ifndef FFS_DEFINED -#define FFS_DEFINED 1 -#ifdef HAVE___BUILTIN_FFS -#define ffs __builtin_ffs -#else -extern int ffs(int i); -#endif - -#ifdef HAVE___BUILTIN_FFSLL -#define ffsll __builtin_ffsll -#else -extern int ffsll(long long int i); -#endif -#endif /* FFS_DEFINED */ - - -#ifdef HAVE___BUILTIN_POPCOUNT -#define _mesa_bitcount(i) __builtin_popcount(i) -#else -extern unsigned int -_mesa_bitcount(unsigned int n); -#endif - -#ifdef HAVE___BUILTIN_POPCOUNTLL -#define _mesa_bitcount_64(i) __builtin_popcountll(i) -#else -extern unsigned int -_mesa_bitcount_64(uint64_t n); -#endif - -/** - * Find the last (most significant) bit set in a word. - * - * Essentially ffs() in the reverse direction. - */ -static inline unsigned int -_mesa_fls(unsigned int n) -{ -#ifdef HAVE___BUILTIN_CLZ - return n == 0 ? 0 : 32 - __builtin_clz(n); -#else - unsigned int v = 1; - - if (n == 0) - return 0; - - while (n >>= 1) - v++; - - return v; -#endif -} - extern int _mesa_round_to_even(float val); diff --git a/src/mesa/program/program_parse.y b/src/mesa/program/program_parse.y index 1664740..e79c944 100644 --- a/src/mesa/program/program_parse.y +++ b/src/mesa/program/program_parse.y @@ -2771,7 +2771,7 @@ _mesa_parse_arb_program(struct gl_context *ctx, GLenum target, const GLubyte *st state->prog->NumInstructions++; state->prog->NumParameters = state->prog->Parameters->NumParameters; - state->prog->NumAttributes = _mesa_bitcount_64(state->prog->InputsRead); + state->prog->NumAttributes = util_bitcount_64(state->prog->InputsRead); /* * Initialize native counts to logical counts. The device driver may diff --git a/src/util/bitcount.h b/src/util/bitcount.h new file mode 100644 index 0000000..87d92d5 --- /dev/null +++ b/src/util/bitcount.h @@ -0,0 +1,196 @@ +/* + * Mesa 3-D graphics library + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Various utilities for counting bits. + */ + +#ifndef UTIL_BITCOUNT_H +#define UTIL_BITCOUNT_H + +#include <assert.h> +#include <string.h> +#include <stdint.h> +#ifndef _MSC_VER +#include <strings.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Find first bit set in word. Least significant bit is 1. + * Return 0 if no bits set. + */ +#if _SVID_SOURCE || _BSD_SOURCE || _POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700 +/* We get ffs for free */ +#elif defined(_MSC_VER) && _MSC_VER >= 1300 && (_M_IX86 || _M_AMD64 || _M_IA64) +unsigned char _BitScanForward(unsigned long* Index, unsigned long Mask); +#pragma intrinsic(_BitScanForward) +static inline +unsigned long ffs( unsigned long u ) +{ + unsigned long i; + if (_BitScanForward(&i, u)) + return i + 1; + else + return 0; +} +#elif defined(HAVE___BUILTIN_FFS) +# define ffs __builtin_ffs +#else +static inline int +ffs(int i) +{ + int bit = 0; + if (i != 0) { + if ((i & 0xffff) == 0) { + bit += 16; + i >>= 16; + } + if ((i & 0xff) == 0) { + bit += 8; + i >>= 8; + } + if ((i & 0xf) == 0) { + bit += 4; + i >>= 4; + } + while ((i & 1) == 0) { + bit++; + i >>= 1; + } + bit++; + } + return bit; +} +#endif + + +#if defined(_GNU_SOURCE) +/* We get ffsll for free */ +#elif defined(HAVE___BUILTIN_FFSLL) +# define ffsll __builtin_ffsll +#else +static inline int +ffsll(long long int val) +{ + int bit; + + assert(sizeof(val) == 8); + + bit = ffs((int) val); + if (bit != 0) + return bit; + + bit = ffs((int) (val >> 32)); + if (bit != 0) + return 32 + bit; + + return 0; +} +#endif + + +/** + * Find the last (most significant) bit set in a word. + * + * Essentially ffs() in the reverse direction. + */ +static inline unsigned +util_last_bit(unsigned n) +{ +#ifdef HAVE___BUILTIN_CLZ + return n == 0 ? 0 : 32 - __builtin_clz(n); +#else + unsigned int v = 1; + + if (n == 0) + return 0; + + while (n >>= 1) + v++; + + return v; +#endif +} + + +/** + * Find last bit in a word that does not match the sign bit. The least + * significant bit is 1. + * Return 0 if no bits are set. + */ +static inline unsigned +util_last_bit_signed(int i) +{ +#if HAVE___BUILTIN_CLRSB + return 31 - __builtin_clrsb(i); +#else + if (i >= 0) + return util_last_bit(i); + else + return util_last_bit(~(unsigned)i); +#endif +} + + +static inline unsigned +util_bitcount(unsigned n) +{ +#ifdef HAVE___BUILTIN_POPCOUNT + return __builtin_popcount(n); +#else + unsigned bits = 0; + for (; n; bits++) { + n &= n - 1; + } + return bits; +#endif +} + + +static inline unsigned +util_bitcount_64(uint64_t n) +{ +#ifdef HAVE___BUILTIN_POPCOUNTLL + return __builtin_popcountll(n); +#define util_bitcount_64(i) __builtin_popcountll(i) + unsigned bits = 0; + for (; n; bits++) { + n &= n - 1; + } + return bits; +#endif +} + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* UTIL_BITCOUNT_H */ -- 2.1.0 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev