On Thu, 19 Aug 2021, Jakub Jelinek wrote:

> Hi!
> 
> As suggested in the PR, the following patch adds two new clrsb
> expansion possibilities if target doesn't have clrsb_optab for the
> requested nor wider modes, but does have clz_optab for the requested
> mode.
> One expansion is
> clrsb (op0)
> expands as
> clz (op0 ^ (((stype)op0) >> (prec-1))) - 1
> which is usable if CLZ_DEFINED_VALUE_AT_ZERO is 2 with value
> of prec, because the clz argument can be 0 and clrsb should give
> prec-1 in that case.
> The other expansion is
> clz (((op0 << 1) ^ (((stype)op0) >> (prec-1))) | 1)
> where the clz argument is never 0, but it is one operation longer.
> E.g. on x86_64-linux with -O2 -mno-lzcnt, this results for
> int foo (int x) { return __builtin_clrsb (x); }
> in
> -     subq    $8, %rsp
> -     movslq  %edi, %rdi
> -     call    __clrsbdi2
> -     addq    $8, %rsp
> -     subl    $32, %eax
> +     leal    (%rdi,%rdi), %eax
> +     sarl    $31, %edi
> +     xorl    %edi, %eax
> +     orl     $1, %eax
> +     bsrl    %eax, %eax
> +     xorl    $31, %eax
> and with -O2 -mlzcnt:
> +     movl    %edi, %eax
> +     sarl    $31, %eax
> +     xorl    %edi, %eax
> +     lzcntl  %eax, %eax
> +     subl    $1, %eax
> On armv7hl-linux-gnueabi with -O2:
> -     push    {r4, lr}
> -     bl      __clrsbsi2
> -     pop     {r4, pc}
> +     @ link register save eliminated.
> +     eor     r0, r0, r0, asr #31
> +     clz     r0, r0
> +     sub     r0, r0, #1
> +     bx      lr
> As it (at least usually) will make code larger, it is
> disabled for -Os or cold instructions.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Richard.

> 2021-08-19  Jakub Jelinek  <ja...@redhat.com>
> 
>       PR middle-end/101950
>       * optabs.c (expand_clrsb_using_clz): New function.
>       (expand_unop): Use it as another clrsb expansion fallback.
> 
>       * gcc.target/i386/pr101950-1.c: New test.
>       * gcc.target/i386/pr101950-2.c: New test.
> 
> --- gcc/optabs.c.jj   2021-07-15 10:16:13.027581160 +0200
> +++ gcc/optabs.c      2021-08-18 13:36:56.410818265 +0200
> @@ -2600,6 +2600,82 @@ widen_leading (scalar_int_mode mode, rtx
>    return 0;
>  }
>  
> +/* Attempt to emit (clrsb:mode op0) as
> +   (plus:mode (clz:mode (xor:mode op0 (ashr:mode op0 (const_int prec-1))))
> +           (const_int -1))
> +   if CLZ_DEFINED_VALUE_AT_ZERO (mode, val) is 2 and val is prec,
> +   or as
> +   (clz:mode (ior:mode (xor:mode (ashl:mode op0 (const_int 1))
> +                              (ashr:mode op0 (const_int prec-1)))
> +                    (const_int 1)))
> +   otherwise.  */
> +
> +static rtx
> +expand_clrsb_using_clz (scalar_int_mode mode, rtx op0, rtx target)
> +{
> +  if (optimize_insn_for_size_p ()
> +      || optab_handler (clz_optab, mode) == CODE_FOR_nothing)
> +    return NULL_RTX;
> +
> +  start_sequence ();
> +  HOST_WIDE_INT val = 0;
> +  if (CLZ_DEFINED_VALUE_AT_ZERO (mode, val) != 2
> +      || val != GET_MODE_PRECISION (mode))
> +    val = 0;
> +  else
> +    val = 1;
> +
> +  rtx temp2 = op0;
> +  if (!val)
> +    {
> +      temp2 = expand_binop (mode, ashl_optab, op0, const1_rtx,
> +                         NULL_RTX, 0, OPTAB_DIRECT);
> +      if (!temp2)
> +     {
> +     fail:
> +       end_sequence ();
> +       return NULL_RTX;
> +     }
> +    }
> +
> +  rtx temp = expand_binop (mode, ashr_optab, op0,
> +                        GEN_INT (GET_MODE_PRECISION (mode) - 1),
> +                        NULL_RTX, 0, OPTAB_DIRECT);
> +  if (!temp)
> +    goto fail;
> +
> +  temp = expand_binop (mode, xor_optab, temp2, temp, NULL_RTX, 0,
> +                    OPTAB_DIRECT);
> +  if (!temp)
> +    goto fail;
> +
> +  if (!val)
> +    {
> +      temp = expand_binop (mode, ior_optab, temp, const1_rtx,
> +                        NULL_RTX, 0, OPTAB_DIRECT);
> +      if (!temp)
> +     goto fail;
> +    }
> +  temp = expand_unop_direct (mode, clz_optab, temp, val ? NULL_RTX : target,
> +                          true);
> +  if (!temp)
> +    goto fail;
> +  if (val)
> +    {
> +      temp = expand_binop (mode, add_optab, temp, constm1_rtx,
> +                        target, 0, OPTAB_DIRECT);
> +      if (!temp)
> +     goto fail;
> +    }
> +
> +  rtx_insn *seq = get_insns ();
> +  end_sequence ();
> +
> +  add_equal_note (seq, temp, CLRSB, op0, NULL_RTX, mode);
> +  emit_insn (seq);
> +  return temp;
> +}
> +
>  /* Try calculating clz of a double-word quantity as two clz's of word-sized
>     quantities, choosing which based on whether the high word is nonzero.  */
>  static rtx
> @@ -3171,6 +3247,9 @@ expand_unop (machine_mode mode, optab un
>         temp = widen_leading (int_mode, op0, target, unoptab);
>         if (temp)
>           return temp;
> +       temp = expand_clrsb_using_clz (int_mode, op0, target);
> +       if (temp)
> +         return temp;
>       }
>        goto try_libcall;
>      }
> --- gcc/testsuite/gcc.target/i386/pr101950-1.c.jj     2021-08-18 
> 13:58:05.363093681 +0200
> +++ gcc/testsuite/gcc.target/i386/pr101950-1.c        2021-08-18 
> 14:01:22.905335834 +0200
> @@ -0,0 +1,20 @@
> +/* PR middle-end/101950 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-lzcnt" } */
> +/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
> +/* { dg-final { scan-assembler-times "\tbsr\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\txor\[ql]\t" 4 } } */
> +/* { dg-final { scan-assembler-times "\tor\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
> +
> +int
> +foo (long x)
> +{
> +  return __builtin_clrsbl (x);
> +}
> +
> +int
> +bar (int x)
> +{
> +  return __builtin_clrsb (x);
> +}
> --- gcc/testsuite/gcc.target/i386/pr101950-2.c.jj     2021-08-18 
> 13:58:11.367009865 +0200
> +++ gcc/testsuite/gcc.target/i386/pr101950-2.c        2021-08-18 
> 14:01:18.540396835 +0200
> @@ -0,0 +1,19 @@
> +/* PR middle-end/101950 */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mlzcnt" } */
> +/* { dg-final { scan-assembler-not "call\[^\n\r]*__clrsb.i2" } } */
> +/* { dg-final { scan-assembler-times "\tlzcnt\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\txor\[ql]\t" 2 } } */
> +/* { dg-final { scan-assembler-times "\tsar\[ql]\t|\tcltd" 2 } } */
> +
> +int
> +foo (long x)
> +{
> +  return __builtin_clrsbl (x);
> +}
> +
> +int
> +bar (int x)
> +{
> +  return __builtin_clrsb (x);
> +}
> 
>       Jakub
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Reply via email to