Hi,

I hadn't received any reply so I had implemented various ways to do this (about 
8 of them in fact).

The conclusion is that no, we cannot emit one big RTL for the final instruction 
immediately.
The reason that all comparisons in the AArch64 backend expand to separate CC 
compares, and
separate testing of the operands is for ifcvt.

The separate CC compare is needed so ifcvt can produce csel, cset etc from the 
compares.  Unlike
say combine, ifcvt can not do recog on a parallel with a clobber.  Should we 
emit the instruction
directly then ifcvt will not be able to say, make a csel, because we have no 
patterns which handle
zero_extract and compare. (unlike combine ifcvt cannot transform the extract 
into an AND).

While you could provide various patterns for this (and I did try) you end up 
with broken patterns
because you can't add the clobber to the CC register.  If you do, ifcvt recog 
fails.

i.e.

int
f1 (int x)
{
  if (x & 1)
    return 1;
  return x;
}

We lose csel here.

Secondly the reason the compare with an explicit CC mode is needed is so that 
ifcvt can transform
the operation into a version that doesn't require the flags to be set.  But it 
only does so if it know
the explicit usage of the CC reg.

For instance 

int
foo (int a, int b)
{
  return ((a & (1 << 25)) ? 5 : 4);
}

Doesn't require a comparison, the optimal form is:

foo(int, int):
        ubfx    x0, x0, 25, 1
        add     w0, w0, 4
        ret

and no compare is actually needed.  If you represent the instruction using an 
ANDS instead of a zero_extract
then you get close, but you end up with an ands followed by an add, which is a 
slower operation.

These two reasons are the main reasons why all comparisons in AArch64 expand 
the way they do, so tbranch
Shouldn't do anything differently here.  Additionally the reason for the optab 
was to pass range information
to the backend during expansion.

In this version however I have represented the expand using an ANDS instead.  
This allows us not to regress
on -O0 as the previous version did.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Note that this patch relies on 
https://patchwork.sourceware.org/project/gcc/patch/y1+4qitmrqhbd...@arm.com/ 
which has yet to be reviewed but which cleans up extensions so they can be used 
like this.

Thanks,
Tamar

gcc/ChangeLog:

        * config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
        (*tb<optab><ALLI:mode><GPI:mode>1): ... this.
        (tbranch_<code><mode>4): New.
        (zero_extend<SI_ONLY:mode><SD_HSDI:mode>2,
        zero_extend<HI_ONLY:mode><SD_HSDI:mode>2,
        zero_extend<QI_ONLY:mode><SD_HSDI:mode>2): Make dynamic calls with @.
        * config/aarch64/iterators.md(ZEROM, zerom): New.

gcc/testsuite/ChangeLog:

        * gcc.target/aarch64/tbz_1.c: New test.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
4c181a96e555c2a58c59fc991000b2a2fa9bd244..7ee1d01e050004e42cd2d0049f0200da71d918bb
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -946,12 +946,33 @@ (define_insn "*cb<optab><mode>1"
                      (const_int 1)))]
 )
 
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch_<code><mode>4"
   [(set (pc) (if_then_else
-             (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
-                                   (const_int 1)
-                                   (match_operand 1
-                                     "aarch64_simd_shift_imm_<mode>" "n"))
+              (EQL (match_operand:ALLI 0 "register_operand")
+                   (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
+              (label_ref (match_operand 2 ""))
+              (pc)))]
+  ""
+{
+  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+  rtx reg = gen_reg_rtx (<ZEROM>mode);
+  if (<MODE>mode == <ZEROM>mode)
+    reg = operands[0];
+  else
+    emit_insn (gen_zero_extend2 (<MODE>mode, <ZEROM>mode, reg, operands[0]));
+  rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
+  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+  operands[1] = const0_rtx;
+  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+                                        operands[1]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+  [(set (pc) (if_then_else
+             (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" 
"r")
+                                    (const_int 1)
+                                    (match_operand 1
+                                      "aarch64_simd_shift_imm_<ALLI:mode>" 
"n"))
                   (const_int 0))
             (label_ref (match_operand 2 "" ""))
             (pc)))
@@ -962,15 +983,15 @@ (define_insn "*tb<optab><mode>1"
       {
        if (get_attr_far_branch (insn) == 1)
          return aarch64_gen_far_branch (operands, 2, "Ltb",
-                                        "<inv_tb>\\t%<w>0, %1, ");
+                                        "<inv_tb>\\t%<ALLI:w>0, %1, ");
        else
          {
            operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-           return "tst\t%<w>0, %1\;<bcond>\t%l2";
+           return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
          }
       }
     else
-      return "<tbz>\t%<w>0, %1, %l2";
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
   }
   [(set_attr "type" "branch")
    (set (attr "length")
@@ -1962,7 +1983,7 @@ (define_insn "extend<ALLX:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp")]
 )
 
-(define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
          (match_operand:SI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1978,7 +1999,7 @@ (define_insn "zero_extend<SI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp,fp,fp,fp")]
 )
 
-(define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,w,r,w")
         (zero_extend:SD_HSDI
          (match_operand:HI_ONLY 1 "nonimmediate_operand" "r,m,r,m,w,w")))]
@@ -1994,7 +2015,7 @@ (define_insn "zero_extend<HI_ONLY:mode><SD_HSDI:mode>2"
    (set_attr "arch" "*,*,fp16,fp,fp,fp16")]
 )
 
-(define_insn "zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
+(define_insn "@zero_extend<QI_ONLY:mode><SD_HSDI:mode>2"
   [(set (match_operand:SD_HSDI 0 "register_operand" "=r,r,w,r,w")
         (zero_extend:SD_HSDI
          (match_operand:QI_ONLY 1 "nonimmediate_operand" "r,m,m,w,w")))]
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 
df72c079f218db9727a96924cab496e91ce6df59..816e44753fb9f6245f3abdb6d3e689a36986ac99
 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1107,6 +1107,8 @@ (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") 
(DI "d")])
 ;; Give the length suffix letter for a sign- or zero-extension.
 (define_mode_attr size [(QI "b") (HI "h") (SI "w")])
 (define_mode_attr sizel [(QI "b") (HI "h") (SI "")])
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
 
 ;; Give the number of bits in the mode
 (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c 
b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 
0000000000000000000000000000000000000000..39deb58e278e2180ab270b5a999cac62cb17c682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99  -fno-unwind-tables 
-fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+**     tbnz    w[0-9]+, #?0, .L([0-9]+)
+**     ret
+**     ...
+*/
+void g1(bool x)
+{
+  if (__builtin_expect (x, 0))
+    h ();
+}
+
+/*
+** g2:
+**     tbz     w[0-9]+, #?0, .L([0-9]+)
+**     b       h
+**     ...
+*/
+void g2(bool x)
+{
+  if (__builtin_expect (x, 1))
+    h ();
+}
+
+/*
+** g3_ge:
+**     tbnz    w[0-9]+, #?31, .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_ge(int x)
+{
+  if (__builtin_expect (x >= 0, 1))
+    h ();
+}
+
+/*
+** g3_gt:
+**     cmp     w[0-9]+, 0
+**     ble     .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_gt(int x)
+{
+  if (__builtin_expect (x > 0, 1))
+    h ();
+}
+
+/*
+** g3_lt:
+**     tbz     w[0-9]+, #?31, .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_lt(int x)
+{
+  if (__builtin_expect (x < 0, 1))
+    h ();
+}
+
+/*
+** g3_le:
+**     cmp     w[0-9]+, 0
+**     bgt     .L[0-9]+
+**     b       h
+**     ...
+*/
+void g3_le(int x)
+{
+  if (__builtin_expect (x <= 0, 1))
+    h ();
+}
+
+/*
+** g5:
+**     mov     w[0-9]+, 65279
+**     tst     w[0-9]+, w[0-9]+
+**     beq     .L[0-9]+
+**     b       h
+**     ...
+*/ 
+void g5(int x)
+{
+  if (__builtin_expect (x & 0xfeff, 1))
+    h ();
+} 

Attachment: rb16486.patch
Description: rb16486.patch

Reply via email to