On Wed, Aug 10, 2016 at 04:00:16PM +0000, Bin Cheng wrote:
> Hi,
> This is a follow up patch for previous vcond patches.  In previous ones, 
> we rely on combiner to simplify "X = !Y; Z = X ? A : B" into "Z = Y ? B : A".
> That works for some cases, but not all of them, for example, case in 
> PR69848.  The reason could be in combiner, but more likely in bsl patterns
> which are too complicated to be handled by combiner.  Investigating all
> cases pattern by pattern would be tedious, this patch modifies vcond 
> patterns to explicitly invert comparison code (as well as switch operands)
> to avoid the additional NOT instruction.  Note un-ordered floating point 
> comparison is not handled because it will complicate the code, also NE is 
> the most common case.  The patch further reduces assembly code in 
> PR69848 on the basis of vcond patches.
> Bootstrap and test on AArch64.  Is it OK?

OK for trunk.

It would be good to reduce some of the code duplication we have in
these vcond patterns. Right now we have 4 copies of essentially
identical code.

Thanks,
James

> 2016-08-03  Bin Cheng  <bin.ch...@arm.com>
> 
>       PR tree-optimization/69848
>       * config/aarch64/aarch64-simd.md (vcond<mode><mode>): Invert NE
>       and swtich operands to avoid additional NOT instruction.
>       (vcond<v_cmp_mixed><mode>): Ditto.
>       (vcondu<mode><mode>, vcondu<mode><v_cmp_mixed>): Ditto.
> 
> gcc/testsuite/ChangeLog
> 2016-08-03  Bin Cheng  <bin.ch...@arm.com>
> 
>       PR tree-optimization/69848
>       * gcc.target/aarch64/simd/vcond-ne-bit.c: New test.

> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index dca079f..3fa88be 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -2575,6 +2575,15 @@
>    rtx mask = gen_reg_rtx (<V_cmp_result>mode);
>    enum rtx_code code = GET_CODE (operands[3]);
>  
> +  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
> +     it as well as switch operands 1/2 in order to avoid the additional
> +     NOT instruction.  */
> +  if (code == NE)
> +    {
> +      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
> +                                 operands[4], operands[5]);
> +      std::swap (operands[1], operands[2]);
> +    }
>    emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
>                                             operands[4], operands[5]));
>    emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
> @@ -2596,6 +2605,15 @@
>    rtx mask = gen_reg_rtx (<V_cmp_result>mode);
>    enum rtx_code code = GET_CODE (operands[3]);
>  
> +  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
> +     it as well as switch operands 1/2 in order to avoid the additional
> +     NOT instruction.  */
> +  if (code == NE)
> +    {
> +      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
> +                                 operands[4], operands[5]);
> +      std::swap (operands[1], operands[2]);
> +    }
>    emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
>                                             operands[4], operands[5]));
>    emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
> @@ -2618,6 +2636,15 @@
>    rtx mask = gen_reg_rtx (<MODE>mode);
>    enum rtx_code code = GET_CODE (operands[3]);
>  
> +  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
> +     it as well as switch operands 1/2 in order to avoid the additional
> +     NOT instruction.  */
> +  if (code == NE)
> +    {
> +      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
> +                                 operands[4], operands[5]);
> +      std::swap (operands[1], operands[2]);
> +    }
>    emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
>                                     operands[4], operands[5]));
>    emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
> @@ -2638,6 +2665,15 @@
>    rtx mask = gen_reg_rtx (<V_cmp_result>mode);
>    enum rtx_code code = GET_CODE (operands[3]);
>  
> +  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
> +     it as well as switch operands 1/2 in order to avoid the additional
> +     NOT instruction.  */
> +  if (code == NE)
> +    {
> +      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
> +                                 operands[4], operands[5]);
> +      std::swap (operands[1], operands[2]);
> +    }
>    emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
>                                                 mask, operands[3],
>                                                 operands[4], operands[5]));
> diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c 
> b/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
> new file mode 100644
> index 0000000..25170c2
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/simd/vcond-ne-bit.c
> @@ -0,0 +1,32 @@
> +/* { dg-do run } */
> +/* { dg-options "-save-temps" } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-require-effective-target vect_condition } */
> +#include <stdlib.h>
> +
> +int fn1 (int) __attribute__ ((noinline));
> +
> +int a[128];
> +int fn1(int d) {
> +  int b, c = 1;
> +  for (b = 0; b < 128; b++)
> +    if (a[b])
> +      c = 0;
> +  return c;
> +}
> +
> +int
> +main (void)
> +{
> +  int i;
> +  for (i = 0; i < 128; i++)
> +    a[i] = 0;
> +  if (fn1(10) != 1)
> +    abort ();
> +  a[3] = 2;
> +  a[24] = 1;
> +  if (fn1(10) != 0)
> +    abort ();
> +  return 0;
> +}
> +/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */

Reply via email to