On 7/30/19 8:37 AM, Richard Biener wrote:
I've played with different BITMAP_WORD and BITMAP_ELEMENT_WORDS
values and found the code using CTZ and friends a bit unwieldly
to adjust. So I came up with a set of macros wrapping them
(_maybe_ inline overloads would be a cleaner solution, but ...).
Specifically it looks like for PR91257 making bitmap_element
cache-line aligned and 32byte in size by using an unsigned
int BITMAP_WORD and BITMAP_ELEMENT_WORDS 3 (thus reducing
the load factor from 0.4 to 0.375) is a slight win (in both
memory use and compile-time). Enlarging to 64 bytes and keeping
unsigned long (and the padding) is worst, even using only
a single unsigned long (but then aligning to 32bytes again)
is faster than the current state. Surprisingly the division/modulo
by 3 or the larger number of loads/stores didn't matter or were
at least offsetted by reliably touching only a single cache-line
per bitmap_element.
I would guess the optimal setting should depend on the host.
Bootstrap & regtest running on x86_64-unknown-linux-gnu.
Any objection to the macro-ification below?
The built-ins are general purpose (but poorly named) and could be
handy elsewhere (I just recently looked for one of them) so rather
than adding BITMAP_WORD_CTZ et al. to bitmap (and continuing to
support the GCC 3 conditional in the bitmap code) I'd suggest
introducing a general purpose wrapper function with a good name
as the portable interface to __builtin_ctz et al.
Say something like this POC:
static inline unsigned
count_trailing_zeros (unsigned x)
{
#if (GCC_VERSION >= 3004)
return __builtin_ctz (x);
#else
unsigned n = 0;
while ((n < sizeof n * CHAR_BIT) && !(x & 1))
{
x >>= 1;
++n;
}
return n;
#endif
}
Martin
Richard.
2019-07-30 Richard Biener <rguent...@suse.de>
* bitmap.h (BITMAP_WORD_CTZ): New define.
(BITMAP_WORD_CLZ): Likewise.
(BITMAP_WORD_POPCOUNT): Likewise.
(bmp_iter_next_bit): Use BITMAP_WORD_CTZ, remove assert.
* bitmap.c (bitmap_count_bits_in_word): use BITMAP_WORD_POPCOUNT.
(bitmap_single_bit_set_p): likewise.
(bitmap_first_set_bit): Use BITMAP_WORD_CTZ, remove assert.
(bitmap_last_set_bit): Use BITMAP_WORD_CLZ, remove assert.
Index: gcc/bitmap.c
===================================================================
--- gcc/bitmap.c (revision 273795)
+++ gcc/bitmap.c (working copy)
@@ -1016,7 +1020,7 @@ bitmap_count_bits_in_word (const BITMAP_
#if GCC_VERSION >= 3400
/* Note that popcountl matches BITMAP_WORD in type, so the actual size
of BITMAP_WORD is not material. */
- count += __builtin_popcountl (bits[ix]);
+ count += BITMAP_WORD_POPCOUNT (bits[ix]);
#else
count += bitmap_popcount (bits[ix]);
#endif
@@ -1101,7 +1105,7 @@ bitmap_single_bit_set_p (const_bitmap a)
#if GCC_VERSION >= 3400
/* Note that popcountl matches BITMAP_WORD in type, so the actual size
of BITMAP_WORD is not material. */
- count += __builtin_popcountl (elt->bits[ix]);
+ count += BITMAP_WORD_POPCOUNT (elt->bits[ix]);
#else
count += bitmap_popcount (elt->bits[ix]);
#endif
@@ -1142,8 +1146,7 @@ bitmap_first_set_bit (const_bitmap a)
bit_no += ix * BITMAP_WORD_BITS;
#if GCC_VERSION >= 3004
- gcc_assert (sizeof (long) == sizeof (word));
- bit_no += __builtin_ctzl (word);
+ bit_no += BITMAP_WORD_CTZ (word);
#else
/* Binary search for the first set bit. */
#if BITMAP_WORD_BITS > 64
@@ -1200,8 +1203,7 @@ bitmap_last_set_bit (const_bitmap a)
found_bit:
bit_no += ix * BITMAP_WORD_BITS;
#if GCC_VERSION >= 3004
- gcc_assert (sizeof (long) == sizeof (word));
- bit_no += BITMAP_WORD_BITS - __builtin_clzl (word) - 1;
+ bit_no += BITMAP_WORD_BITS - BITMAP_WORD_CLZ (word) - 1;
#else
/* Hopefully this is a twos-complement host... */
BITMAP_WORD x = word;
Index: gcc/bitmap.h
===================================================================
--- gcc/bitmap.h (revision 273795)
+++ gcc/bitmap.h (working copy)
@@ -272,6 +272,10 @@ extern mem_alloc_description<bitmap_usag
/* Fundamental storage type for bitmap. */
typedef unsigned long BITMAP_WORD;
+/* Word primitives. */
+#define BITMAP_WORD_CTZ __builtin_ctzl
+#define BITMAP_WORD_CLZ __builtin_clzl
+#define BITMAP_WORD_POPCOUNT __builtin_popcountl
/* BITMAP_WORD_BITS needs to be unsigned, but cannot contain casts as
it is used in preprocessor directives -- hence the 1u. */
#define BITMAP_WORD_BITS (CHAR_BIT * SIZEOF_LONG * 1u)
@@ -683,8 +688,7 @@ bmp_iter_next_bit (bitmap_iterator * bi,
{
#if (GCC_VERSION >= 3004)
{
- unsigned int n = __builtin_ctzl (bi->bits);
- gcc_assert (sizeof (unsigned long) == sizeof (BITMAP_WORD));
+ unsigned int n = BITMAP_WORD_CTZ (bi->bits);
bi->bits >>= n;
*bit_no += n;
}