On 7/30/19 8:37 AM, Richard Biener wrote:

I've played with different BITMAP_WORD and BITMAP_ELEMENT_WORDS
values and found the code using CTZ and friends a bit unwieldly
to adjust.  So I came up with a set of macros wrapping them
(_maybe_ inline overloads would be a cleaner solution, but ...).

Specifically it looks like for PR91257 making bitmap_element
cache-line aligned and 32byte in size by using an unsigned
int BITMAP_WORD and BITMAP_ELEMENT_WORDS 3 (thus reducing
the load factor from 0.4 to 0.375) is a slight win (in both
memory use and compile-time).  Enlarging to 64 bytes and keeping
unsigned long (and the padding) is worst, even using only
a single unsigned long (but then aligning to 32bytes again)
is faster than the current state.  Surprisingly the division/modulo
by 3 or the larger number of loads/stores didn't matter or were
at least offsetted by reliably touching only a single cache-line
per bitmap_element.

I would guess the optimal setting should depend on the host.

Bootstrap & regtest running on x86_64-unknown-linux-gnu.

Any objection to the macro-ification below?

The built-ins are general purpose (but poorly named) and could be
handy elsewhere (I just recently looked for one of them) so rather
than adding BITMAP_WORD_CTZ et al. to bitmap (and continuing to
support the GCC 3 conditional in the bitmap code) I'd suggest
introducing a general purpose wrapper function with a good name
as the portable interface to __builtin_ctz et al.

Say something like this POC:

  static inline unsigned
  count_trailing_zeros (unsigned x)
  {
  #if (GCC_VERSION >= 3004)
    return __builtin_ctz (x);
  #else
    unsigned n = 0;
    while ((n < sizeof n * CHAR_BIT) && !(x & 1))
      {
        x >>= 1;
        ++n;
       }
    return n;
  #endif
  }

Martin

Richard.

2019-07-30  Richard Biener  <rguent...@suse.de>

        * bitmap.h (BITMAP_WORD_CTZ): New define.
        (BITMAP_WORD_CLZ): Likewise.
        (BITMAP_WORD_POPCOUNT): Likewise.
        (bmp_iter_next_bit): Use BITMAP_WORD_CTZ, remove assert.
        * bitmap.c (bitmap_count_bits_in_word): use BITMAP_WORD_POPCOUNT.
        (bitmap_single_bit_set_p): likewise.
        (bitmap_first_set_bit): Use BITMAP_WORD_CTZ, remove assert.
        (bitmap_last_set_bit): Use BITMAP_WORD_CLZ, remove assert.

Index: gcc/bitmap.c
===================================================================
--- gcc/bitmap.c        (revision 273795)
+++ gcc/bitmap.c        (working copy)
@@ -1016,7 +1020,7 @@ bitmap_count_bits_in_word (const BITMAP_
  #if GCC_VERSION >= 3400
        /* Note that popcountl matches BITMAP_WORD in type, so the actual size
         of BITMAP_WORD is not material.  */
-      count += __builtin_popcountl (bits[ix]);
+      count += BITMAP_WORD_POPCOUNT (bits[ix]);
  #else
        count += bitmap_popcount (bits[ix]);
  #endif
@@ -1101,7 +1105,7 @@ bitmap_single_bit_set_p (const_bitmap a)
  #if GCC_VERSION >= 3400
        /* Note that popcountl matches BITMAP_WORD in type, so the actual size
         of BITMAP_WORD is not material.  */
-      count += __builtin_popcountl (elt->bits[ix]);
+      count += BITMAP_WORD_POPCOUNT (elt->bits[ix]);
  #else
        count += bitmap_popcount (elt->bits[ix]);
  #endif
@@ -1142,8 +1146,7 @@ bitmap_first_set_bit (const_bitmap a)
    bit_no += ix * BITMAP_WORD_BITS;
#if GCC_VERSION >= 3004
-  gcc_assert (sizeof (long) == sizeof (word));
-  bit_no += __builtin_ctzl (word);
+  bit_no += BITMAP_WORD_CTZ (word);
  #else
    /* Binary search for the first set bit.  */
  #if BITMAP_WORD_BITS > 64
@@ -1200,8 +1203,7 @@ bitmap_last_set_bit (const_bitmap a)
   found_bit:
    bit_no += ix * BITMAP_WORD_BITS;
  #if GCC_VERSION >= 3004
-  gcc_assert (sizeof (long) == sizeof (word));
-  bit_no += BITMAP_WORD_BITS - __builtin_clzl (word) - 1;
+  bit_no += BITMAP_WORD_BITS - BITMAP_WORD_CLZ (word) - 1;
  #else
    /* Hopefully this is a twos-complement host...  */
    BITMAP_WORD x = word;
Index: gcc/bitmap.h
===================================================================
--- gcc/bitmap.h        (revision 273795)
+++ gcc/bitmap.h        (working copy)
@@ -272,6 +272,10 @@ extern mem_alloc_description<bitmap_usag
  /* Fundamental storage type for bitmap.  */
typedef unsigned long BITMAP_WORD;
+/* Word primitives.  */
+#define BITMAP_WORD_CTZ __builtin_ctzl
+#define BITMAP_WORD_CLZ __builtin_clzl
+#define BITMAP_WORD_POPCOUNT __builtin_popcountl
  /* BITMAP_WORD_BITS needs to be unsigned, but cannot contain casts as
     it is used in preprocessor directives -- hence the 1u.  */
  #define BITMAP_WORD_BITS (CHAR_BIT * SIZEOF_LONG * 1u)
@@ -683,8 +688,7 @@ bmp_iter_next_bit (bitmap_iterator * bi,
  {
  #if (GCC_VERSION >= 3004)
    {
-    unsigned int n = __builtin_ctzl (bi->bits);
-    gcc_assert (sizeof (unsigned long) == sizeof (BITMAP_WORD));
+    unsigned int n = BITMAP_WORD_CTZ (bi->bits);
      bi->bits >>= n;
      *bit_no += n;
    }


Reply via email to