RE: [PATCH] aarch64: prefer using csinv, csneg in zero extend contexts

Alex Coplan Thu, 07 May 2020 08:52:04 -0700

> -----Original Message-----
> From: Richard Sandiford <richard.sandif...@arm.com>
> Sent: 06 May 2020 11:28
> To: Alex Coplan <alex.cop...@arm.com>
> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw <richard.earns...@arm.com>;
> Marcus Shawcroft <marcus.shawcr...@arm.com>; Kyrylo Tkachov
> <kyrylo.tkac...@arm.com>; nd <n...@arm.com>
> Subject: Re: [PATCH] aarch64: prefer using csinv, csneg in zero extend
> contexts
>
> Alex Coplan <alex.cop...@arm.com> writes:
> >> -----Original Message-----
> >> From: Richard Sandiford <richard.sandif...@arm.com>
> >> Sent: 30 April 2020 15:13
> >> To: Alex Coplan <alex.cop...@arm.com>
> >> Cc: gcc-patches@gcc.gnu.org; Richard Earnshaw
> <richard.earns...@arm.com>;
> >> Marcus Shawcroft <marcus.shawcr...@arm.com>; Kyrylo Tkachov
> >> <kyrylo.tkac...@arm.com>; nd <n...@arm.com>
> >> Subject: Re: [PATCH] aarch64: prefer using csinv, csneg in zero extend
> contexts
> >>
> >> Yeah, I was hoping for something like...
> >>
> >> > Indeed, clang generates a MVN + CSEL sequence where the CSEL
> operates on the
> >> > 64-bit registers:
> >> >
> >> > f:
> >> >         mvn     w8, w2
> >> >         cmp     w0, #0
> >> >         csel    x0, x8, x1, eq
> >> >         ret
> >>
> >> ...this rather than the 4-insn (+ret) sequence that we currently
> >> generate.  So it would have been a define_insn_and_split that handles
> >> the zero case directly but splits into the "optimal" two-instruction
> >> sequence for registers.
> >>
> >> But I guess the underlying problem is instead that we don't have
> >> a pattern for (zero_extend:DI (not:SI ...)).  Adding that would
> >> definitely be a better fix.
> >
> > Yes. I sent a patch for this very fix which Kyrill is going to commit
> once stage
> > 1 opens: https://gcc.gnu.org/pipermail/gcc-patches/2020-
> April/544365.html
>
> Sorry, missed that.
>
> It looks like that patch hinders this one though.  Trying it with
> current master (where that patch is applied), I get:
>
> FAIL: gcc.target/aarch64/csinv-neg.c check-function-bodies inv_zero1
> FAIL: gcc.target/aarch64/csinv-neg.c check-function-bodies inv_zero2
>
> It looks like a costs issue:
>
> Trying 27 -> 18:
>    27: r99:DI=zero_extend(~r101:SI)
>       REG_DEAD r101:SI
>    18: x0:DI={(cc:CC==0)?r99:DI:0}
>       REG_DEAD cc:CC
>       REG_DEAD r99:DI
> Successfully matched this instruction:
> (set (reg/i:DI 0 x0)
>     (if_then_else:DI (eq (reg:CC 66 cc)
>             (const_int 0 [0]))
>         (zero_extend:DI (not:SI (reg:SI 101)))
>         (const_int 0 [0])))
> rejecting combination of insns 27 and 18
> original costs 4 + 4 = 8
> replacement cost 12
>
> I guess we'll need to teach aarch64_if_then_else_costs about the costs
> of the new insns.


Ah, thanks for catching this. I've attached an updated patch which fixes the
costs issue here. With the new patch, all the test cases in csinv-neg.c now
pass. In addition, I've done a bootstrap and regtest on aarch64-linux with no
new failures.

Do you think we need to add cases to aarch64_if_then_else_costs for the other
new insns, or is the attached patch OK for master?

Thanks,
Alex

---

gcc/ChangeLog:

2020-05-07  Alex Coplan  <alex.cop...@arm.com>

        * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Add case to 
correctly
        calculate cost for new pattern (*csinv3_uxtw_insn3).
        * config/aarch64/aarch64.md (*csinv3_utxw_insn1): New.
        (*csinv3_uxtw_insn2): New.
        (*csinv3_uxtw_insn3): New.
        * config/aarch64/iterators.md (neg_not_cs): New.

gcc/testsuite/ChangeLog:

2020-05-07  Alex Coplan  <alex.cop...@arm.com>

        * gcc.target/aarch64/csinv-neg.c: New test.

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e92c7e69fcb..efb3da7a7fc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11695,6 +11695,15 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, 
int *cost, bool speed)
          op1 = XEXP (op1, 0);
          op2 = XEXP (op2, 0);
        }
+      else if (GET_CODE (op1) == ZERO_EXTEND && op2 == const0_rtx)
+       {
+         inner = XEXP (op1, 0);
+         if (GET_CODE (inner) == NEG || GET_CODE (inner) == NOT)
+         {
+           /* CSINV/NEG with zero extend + const 0 (*csinv3_uxtw_insn3).  */
+           op1 = XEXP (inner, 0);
+         }
+       }
 
       *cost += rtx_cost (op1, VOIDmode, IF_THEN_ELSE, 1, speed);
       *cost += rtx_cost (op2, VOIDmode, IF_THEN_ELSE, 2, speed);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index ff15505d455..b2cfd015530 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4391,6 +4391,44 @@
   [(set_attr "type" "csel")]
 )
 
+(define_insn "*csinv3_uxtw_insn1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (if_then_else:DI
+         (match_operand 1 "aarch64_comparison_operation" "")
+         (zero_extend:DI
+           (match_operand:SI 2 "register_operand" "r"))
+         (zero_extend:DI
+           (NEG_NOT:SI (match_operand:SI 3 "register_operand" "r")))))]
+  ""
+  "cs<neg_not_cs>\\t%w0, %w2, %w3, %m1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csinv3_uxtw_insn2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (if_then_else:DI
+         (match_operand 1 "aarch64_comparison_operation" "")
+         (zero_extend:DI
+           (NEG_NOT:SI (match_operand:SI 2 "register_operand" "r")))
+         (zero_extend:DI
+           (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "cs<neg_not_cs>\\t%w0, %w3, %w2, %M1"
+  [(set_attr "type" "csel")]
+)
+
+(define_insn "*csinv3_uxtw_insn3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (if_then_else:DI
+         (match_operand 1 "aarch64_comparison_operation" "")
+         (zero_extend:DI
+           (NEG_NOT:SI (match_operand:SI 2 "register_operand" "r")))
+         (const_int 0)))]
+  ""
+  "cs<neg_not_cs>\\t%w0, wzr, %w2, %M1"
+  [(set_attr "type" "csel")]
+)
+
 ;; If X can be loaded by a single CNT[BHWD] instruction,
 ;;
 ;;    A = UMAX (B, X)
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 8e434389e59..a568cf21b99 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1932,6 +1932,9 @@
 ;; Operation names for negate and bitwise complement.
 (define_code_attr neg_not_op [(neg "neg") (not "not")])
 
+;; csinv, csneg insn suffixes.
+(define_code_attr neg_not_cs [(neg "neg") (not "inv")])
+
 ;; Similar, but when the second operand is inverted.
 (define_code_attr nlogical [(and "bic") (ior "orn") (xor "eon")])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/csinv-neg.c 
b/gcc/testsuite/gcc.target/aarch64/csinv-neg.c
new file mode 100644
index 00000000000..cc64b4094d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/csinv-neg.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+/*
+** inv1:
+**     cmp     w0, 0
+**     csinv   w0, w1, w2, ne
+**     ret
+*/
+unsigned long long
+inv1(unsigned a, unsigned b, unsigned c)
+{
+  return a ? b : ~c;
+}
+
+/*
+** inv1_local:
+**     cmp     w0, 0
+**     csinv   w0, w1, w2, ne
+**     ret
+*/
+unsigned long long
+inv1_local(unsigned a, unsigned b, unsigned c)
+{
+  unsigned d = ~c;
+  return a ? b : d;
+}
+
+/*
+** inv_zero1:
+**     cmp     w0, 0
+**     csinv   w0, wzr, w1, ne
+**     ret
+*/
+unsigned long long
+inv_zero1(unsigned a, unsigned b)
+{
+  return a ? 0 : ~b;
+}
+
+/*
+** inv_zero2:
+**     cmp     w0, 0
+**     csinv   w0, wzr, w1, eq
+**     ret
+*/
+unsigned long long
+inv_zero2(unsigned a, unsigned b)
+{
+  return a ? ~b : 0;
+}
+
+
+/*
+** inv2:
+**     cmp     w0, 0
+**     csinv   w0, w2, w1, eq
+**     ret
+*/
+unsigned long long
+inv2(unsigned a, unsigned b, unsigned c)
+{
+  return a ? ~b : c;
+}
+
+/*
+** inv2_local:
+**     cmp     w0, 0
+**     csinv   w0, w2, w1, eq
+**     ret
+*/
+unsigned long long
+inv2_local(unsigned a, unsigned b, unsigned c)
+{
+  unsigned d = ~b;
+  return a ? d : c;
+}
+
+/*
+** neg1:
+**     cmp     w0, 0
+**     csneg   w0, w1, w2, ne
+**     ret
+*/
+unsigned long long
+neg1(unsigned a, unsigned b, unsigned c)
+{
+  return a ? b : -c;
+}
+
+
+/*
+** neg2:
+**     cmp     w0, 0
+**     csneg   w0, w2, w1, eq
+**     ret
+*/
+unsigned long long
+neg2(unsigned a, unsigned b, unsigned c)
+{
+  return a ? -b : c;
+}
+
+/* { dg-final { check-function-bodies "**" "" "" } } */

RE: [PATCH] aarch64: prefer using csinv, csneg in zero extend contexts

Reply via email to