ARM64 doesn't implement find_first_{zero}_bit in arch code and doesn't
enable it in config. It leads to using find_next_bit() which is less
efficient:

0000000000000000 <find_first_bit>:
   0:   aa0003e4        mov     x4, x0
   4:   aa0103e0        mov     x0, x1
   8:   b4000181        cbz     x1, 38 <find_first_bit+0x38>
   c:   f9400083        ldr     x3, [x4]
  10:   d2800802        mov     x2, #0x40                       // #64
  14:   91002084        add     x4, x4, #0x8
  18:   b40000c3        cbz     x3, 30 <find_first_bit+0x30>
  1c:   14000008        b       3c <find_first_bit+0x3c>
  20:   f8408483        ldr     x3, [x4], #8
  24:   91010045        add     x5, x2, #0x40
  28:   b50000c3        cbnz    x3, 40 <find_first_bit+0x40>
  2c:   aa0503e2        mov     x2, x5
  30:   eb02001f        cmp     x0, x2
  34:   54ffff68        b.hi    20 <find_first_bit+0x20>  // b.pmore
  38:   d65f03c0        ret
  3c:   d2800002        mov     x2, #0x0                        // #0
  40:   dac00063        rbit    x3, x3
  44:   dac01063        clz     x3, x3
  48:   8b020062        add     x2, x3, x2
  4c:   eb02001f        cmp     x0, x2
  50:   9a829000        csel    x0, x0, x2, ls  // ls = plast
  54:   d65f03c0        ret

  ...

0000000000000118 <_find_next_bit.constprop.1>:
 118:   eb02007f        cmp     x3, x2
 11c:   540002e2        b.cs    178 <_find_next_bit.constprop.1+0x60>  // b.hs, 
b.nlast
 120:   d346fc66        lsr     x6, x3, #6
 124:   f8667805        ldr     x5, [x0, x6, lsl #3]
 128:   b4000061        cbz     x1, 134 <_find_next_bit.constprop.1+0x1c>
 12c:   f8667826        ldr     x6, [x1, x6, lsl #3]
 130:   8a0600a5        and     x5, x5, x6
 134:   ca0400a6        eor     x6, x5, x4
 138:   92800005        mov     x5, #0xffffffffffffffff         // #-1
 13c:   9ac320a5        lsl     x5, x5, x3
 140:   927ae463        and     x3, x3, #0xffffffffffffffc0
 144:   ea0600a5        ands    x5, x5, x6
 148:   54000120        b.eq    16c <_find_next_bit.constprop.1+0x54>  // b.none
 14c:   1400000e        b       184 <_find_next_bit.constprop.1+0x6c>
 150:   d346fc66        lsr     x6, x3, #6
 154:   f8667805        ldr     x5, [x0, x6, lsl #3]
 158:   b4000061        cbz     x1, 164 <_find_next_bit.constprop.1+0x4c>
 15c:   f8667826        ldr     x6, [x1, x6, lsl #3]
 160:   8a0600a5        and     x5, x5, x6
 164:   eb05009f        cmp     x4, x5
 168:   540000c1        b.ne    180 <_find_next_bit.constprop.1+0x68>  // b.any
 16c:   91010063        add     x3, x3, #0x40
 170:   eb03005f        cmp     x2, x3
 174:   54fffee8        b.hi    150 <_find_next_bit.constprop.1+0x38>  // 
b.pmore
 178:   aa0203e0        mov     x0, x2
 17c:   d65f03c0        ret
 180:   ca050085        eor     x5, x4, x5
 184:   dac000a5        rbit    x5, x5
 188:   dac010a5        clz     x5, x5
 18c:   8b0300a3        add     x3, x5, x3
 190:   eb03005f        cmp     x2, x3
 194:   9a839042        csel    x2, x2, x3, ls  // ls = plast
 198:   aa0203e0        mov     x0, x2
 19c:   d65f03c0        ret

 ...

0000000000000238 <find_next_bit>:
 238:   a9bf7bfd        stp     x29, x30, [sp, #-16]!
 23c:   aa0203e3        mov     x3, x2
 240:   d2800004        mov     x4, #0x0                        // #0
 244:   aa0103e2        mov     x2, x1
 248:   910003fd        mov     x29, sp
 24c:   d2800001        mov     x1, #0x0                        // #0
 250:   97ffffb2        bl      118 <_find_next_bit.constprop.1>
 254:   a8c17bfd        ldp     x29, x30, [sp], #16
 258:   d65f03c0        ret

Enabling this functions would also benefit for_each_{set,clear}_bit().
Would it make sense to enable this config for all such architectures by
default?

Signed-off-by: Yury Norov <[email protected]>

---
 arch/arm64/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1515f6f153a0..2b90ef1f548e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -106,6 +106,7 @@ config ARM64
        select GENERIC_CPU_AUTOPROBE
        select GENERIC_CPU_VULNERABILITIES
        select GENERIC_EARLY_IOREMAP
+       select GENERIC_FIND_FIRST_BIT
        select GENERIC_IDLE_POLL_SETUP
        select GENERIC_IRQ_IPI
        select GENERIC_IRQ_MULTI_HANDLER
-- 
2.25.1

Reply via email to