https://gcc.gnu.org/g:1c9d321611367608d6bc1d97cf35b4c1bcb4b2d1

commit r16-5585-g1c9d321611367608d6bc1d97cf35b4c1bcb4b2d1
Author: Tamar Christina <[email protected]>
Date:   Tue Nov 25 12:51:31 2025 +0000

    middle-end: support new {cond{_len}_}vec_cbranch_{any|all} optabs [PR118974]
    
    This patch introduces six new vector cbranch optabs
    
    1. vec_cbranch_any and vec_cbranch_all.
    2. cond_vec_cbranch_any and cond_vec_cbranch_all.
    3. cond_len_vec_cbranch_any and cond_len_vec_cbranch_all.
    
    Today cbranch can be used for both vector and scalar modes.  In both these
    cases it's intended to compare boolean values, either scalar or vector.
    
    The optab documentation does not however state that it can only handle
    comparisons against 0.  So many targets have added code for the vector 
variant
    that tries to deal with the case where we branch based on two non-zero
    registers.
    
    However this code can't ever be reached because the cbranch expansion only 
deals
    with comparisons against 0 for vectors.  This is because for vectors the 
rest of
    the compiler has no way to generate a non-zero comparison. e.g. the 
vectorizer
    will always generate a zero comparison, and the C/C++ front-ends won't allow
    vectors to be used in a cbranch as it expects a boolean value.  ISAs like 
SVE
    work around this by requiring you to use an SVE PTEST intrinsics which 
results
    in a single scalar boolean value that represents the flag values.
    
    e.g. if (svptest_any (..))
    
    The natural question is why do we not at expand time then rewrite the 
comparison
    to a non-zero comparison if the target supports it.
    
    The reason is we can't safely do so.  For an ANY comparison (e.g. != b) 
this is
    trivial, but for an ALL comparison (e.g. == b) we would have to flip both 
branch
    and invert the value being compared.  i.e. we have to make it a != b 
comparison.
    
    But in emit_cmp_and_jump_insns we can't flip the branches anymore because 
they
    have already been lowered into a fall through branch (PC) and a label, 
ready for
    use in an if_then_else RTL expression.
    
    Now why does any of this matter?  Well there are three optimizations we 
want to be
    able to do.
    
    1. Adv. SIMD does not support a vector !=, as in there's no instruction for 
it.
       For both Integer and FP vectors we perform the comparisons as EQ and then
       invert the resulting mask.  Ideally we'd like to replace this with just 
a XOR
       and the appropriate branch.
    
    2. When on an SVE enabled system we would like to use an SVE compare + 
branch
       for the Adv. SIMD sequence which could happen due to cost modelling.  
However
       we can only do so based on if we know that the values being compared 
against
       are the boolean masks.  This means we can't really use combine to do this
       because combine would have to match the entire sequence including the
       vector comparisons because at RTL we've lost the information that
       VECTOR_BOOLEAN_P would have given us.  This sequence would be too long 
for
       combine to match due to it having to match the compare + branch sequence
       being generated as well.  It also becomes a bit messy to match ANY and 
ALL
       sequences.
    
    3. For SVE systems we would like to avoid generating the PTEST operation
       whenever possible.  Because SVE vector integer comparisons already set 
flags
       we don't need the PTEST on an any or all check.  Eliminating this in RTL 
is
       difficult, so the best approach is to not generate the PTEST at all when 
not
       needed.
    
    To handle these three cases the new optabs are added and the current 
cbranch is
    no longer required if the target does not need help in distinguishing 
between
    boolean vector vs data vector operands.
    
    This difference is not important for correctness, but it is for 
optimization.
    So I've chosen not to deprecate the cbranch_optab but make it completely 
optional.
    
    I'll try to explain why:
    
    An example is when unrolling is done on Adv. SIMD early break loops.
    
    We generate
    
      vect__1.8_29 = MEM <vector(4) int> [(int *)_25];
      vect__1.9_31 = MEM <vector(4) int> [(int *)_25 + 16B];
      mask_patt_10.10_32 = vect__1.8_29 == { 124, 124, 124, 124 };
      mask_patt_10.10_33 = vect__1.9_31 == { 124, 124, 124, 124 };
      vexit_reduc_34 = .VEC_TRUNC_ADD_HIGH (mask_patt_10.10_33, 
mask_patt_10.10_32);
      if (vexit_reduc_34 != { 0, 0, 0, 0 })
        goto <bb 4>; [5.50%]
      else
        goto <bb 18>; [94.50%]
    
    And so the new optabs aren't immediately useful because the comparisons 
can't
    be done by the optab itself.
    
    As such vec_cbranch_any would be called with vexit_reduc_34 and { 0, 0, 0, 
0 }
    however since this expects to perform the comparison itself we end up with
    
            ldp     q30, q31, [x0], 32
            cmeq    v30.4s, v30.4s, v27.4s
            cmeq    v31.4s, v31.4s, v27.4s
            addhn   v31.4h, v31.4s, v30.4s
            cmtst   v31.4h, v31.4h, v31.4h
            fmov    x3, d31
            cbz     x3, .L2
    
    instead of
    
            ldp     q30, q31, [x0], 32
            cmeq    v30.4s, v30.4s, v27.4s
            cmeq    v31.4s, v31.4s, v27.4s
            addhn   v31.4h, v31.4s, v30.4s
            fmov    x3, d31
            cbz     x3, .L2
    
    because we don't know that the value is already a boolean -1/0 value.  
Without
    this we can't safely not perform the compare.
    
    The conversion is needed because e.g. it's not valid to drop the compare 
with
    zero when the vector just contains data:
    
    v30.8h = [ 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008 ]
    cmeq   v31.8h, v30.8h, #0        // -> v31.8h = [0,0,0,0,0,0,0,0]
    umaxp  v31.4s, v31.4s, v31.4s    // pairwise-OR over 0/FFFF masks -> still 
[0,0,0,0]
    fmov   x7, d31                   // x7 = 0
    cbnz   x7, .L6                   // NOT taken (correct: there were no zeros)
    
    vs
    
    umaxp v31.4s, v31.4s, v31.4s     // pairwise unsigned max:
                                     //   [ 
max(0x00020001,0x00040003)=0x00040003,
                                     //     
max(0x00060005,0x00080007)=0x00080007, ... ]
    fmov  x7, d31                    // x7 = 0x0008000700040003  (non-zero)
    cbnz  x7, .L66                   // TAKEN
    
    As such, to avoid the extra compare on boolean vectors, we still need the
    cbranch_optab or the new vec_cbranch_* optabs need an extre operand to 
indicate
    what kind of data they hold.  Note that this isn't an issue for SVE because
    SVE has BImode for booleans.
    
    With these two optabs it's trivial to implement all the optimizations I
    described above.
    
    I.e. with them we can now generate
    
    .L2:
            ldr     q31, [x1, x2]
            add     v29.4s, v29.4s, v25.4s
            add     v28.4s, v28.4s, v26.4s
            add     v31.4s, v31.4s, v30.4s
            str     q31, [x1, x2]
            add     x1, x1, 16
            cmp     x1, 2560
            beq     .L1
    .L6:
            ldr     q30, [x3, x1]
            cmpeq   p15.s, p7/z, z30.s, z27.s
            b.none  .L2
    
    and easily prove it correct.
    
    gcc/ChangeLog:
    
            PR target/118974
            * optabs.def (vec_cbranch_any_optab, vec_cbranch_all_optab,
            cond_vec_cbranch_any_optab, cond_vec_cbranch_all_optab,
            cond_len_vec_cbranch_any_optab, cond_len_vec_cbranch_all_optab): 
New.
            * doc/md.texi: Document them.
            * optabs.cc (prepare_cmp_insn): Refactor to take optab to check for
            instead of hardcoded cbranch and support mask and len.
            (emit_cmp_and_jump_insn_1, emit_cmp_and_jump_insns): Implement them.
            (emit_conditional_move, emit_conditional_add, gen_cond_trap): Update
            after changing function signatures to support new optabs.

Diff:
---
 gcc/doc/md.texi |  60 ++++++++++++++
 gcc/optabs.cc   | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 gcc/optabs.def  |   6 ++
 3 files changed, 289 insertions(+), 25 deletions(-)

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 6dedca225ae1..7bf2cc0aa1ba 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -7665,8 +7665,68 @@ position of Operand 1 to test.  Operand 3 is the 
@code{code_label} to jump to.
 Conditional branch instruction combined with a compare instruction.
 Operand 0 is a comparison operator.  Operand 1 and operand 2 are the
 first and second operands of the comparison, respectively.  Operand 3
+is the @code{code_label} to jump to.  For vectors this optab is only used for
+comparisons of VECTOR_BOOLEAN_TYPE_P values and it never called for
+data-registers.  Data vector operands should use one of the patterns below
+instead.
+
+@cindex @code{vec_cbranch_any@var{mode}} instruction pattern
+@item @samp{vec_cbranch_any@var{mode}}
+Conditional branch instruction based on a vector compare that branches
+when at least one of the elementwise comparisons of the two input
+vectors is true.
+Operand 0 is a comparison operator.  Operand 1 and operand 2 are the
+first and second operands of the comparison, respectively.  Operand 3
 is the @code{code_label} to jump to.
 
+@cindex @code{vec_cbranch_all@var{mode}} instruction pattern
+@item @samp{vec_cbranch_all@var{mode}}
+Conditional branch instruction based on a vector compare that branches
+when all of the elementwise comparisons of the two input vectors is true.
+Operand 0 is a comparison operator.  Operand 1 and operand 2 are the
+first and second operands of the comparison, respectively.  Operand 3
+is the @code{code_label} to jump to.
+
+@cindex @code{cond_vec_cbranch_any@var{mode}} instruction pattern
+@item @samp{cond_vec_cbranch_any@var{mode}}
+Masked conditional branch instruction based on a vector compare that branches
+when at least one of the elementwise comparisons of the two input
+vectors is true.
+Operand 0 is a comparison operator.  Operand 1 is the mask operand.
+Operand 2 and operand 3 are the first and second operands of the comparison,
+respectively.  Operand 5 is the @code{code_label} to jump to.  Inactive lanes 
in
+the mask operand should not influence the decision to branch.
+
+@cindex @code{cond_vec_cbranch_all@var{mode}} instruction pattern
+@item @samp{cond_vec_cbranch_all@var{mode}}
+Masked conditional branch instruction based on a vector compare that branches
+when all of the elementwise comparisons of the two input vectors is true.
+Operand 0 is a comparison operator.  Operand 1 is the mask operand.
+Operand 2 and operand 3 are the first and second operands of the comparison,
+respectively.  Operand 5 is the @code{code_label} to jump to.  Inactive lanes 
in
+the mask operand should not influence the decision to branch.
+
+@cindex @code{cond_len_vec_cbranch_any@var{mode}} instruction pattern
+@item @samp{cond_len_vec_cbranch_any@var{mode}}
+Len based conditional branch instruction based on a vector compare that 
branches
+when at least one of the elementwise comparisons of the two input
+vectors is true.
+Operand 0 is a comparison operator.  Operand 1 is the mask operand.  Operand 2
+and operand 3 are the first and second operands of the comparison, 
respectively.
+Operand 4 is the len operand and Operand 5 is the bias operand.  Operand 6 is
+the @code{code_label} to jump to.  Inactive lanes in the mask operand should 
not
+influence the decision to branch.
+
+@cindex @code{cond_len_vec_cbranch_all@var{mode}} instruction pattern
+@item @samp{cond_len_vec_cbranch_all@var{mode}}
+Len based conditional branch instruction based on a vector compare that 
branches
+when all of the elementwise comparisons of the two input vectors is true.
+Operand 0 is a comparison operator.  Operand 1 is the mask operand.  Operand 2
+and operand 3 are the first and second operands of the comparison, 
respectively.
+Operand 4 is the len operand and Operand 5 is the bias operand.  Operand 6 is
+the @code{code_label} to jump to. Inactive lanes in the mask operand should not
+influence the decision to branch.
+
 @cindex @code{jump} instruction pattern
 @item @samp{jump}
 A jump inside a function; an unconditional branch.  Operand 0 is the
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
index 0865fc2e19ae..10989a29c514 100644
--- a/gcc/optabs.cc
+++ b/gcc/optabs.cc
@@ -48,6 +48,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "langhooks.h"
 #include "gimple.h"
 #include "ssa.h"
+#include "tree-ssa-live.h"
+#include "tree-outof-ssa.h"
 
 static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
                                   machine_mode *);
@@ -4405,6 +4407,9 @@ can_vec_extract_var_idx_p (machine_mode vec_mode, 
machine_mode extr_mode)
 
    *PMODE is the mode of the inputs (in case they are const_int).
 
+   *OPTAB is the optab to check for OPTAB_DIRECT support.  Defaults to
+   cbranch_optab.
+
    This function performs all the setup necessary so that the caller only has
    to emit a single comparison insn.  This setup can involve doing a BLKmode
    comparison or emitting a library call to perform the comparison if no insn
@@ -4414,9 +4419,9 @@ can_vec_extract_var_idx_p (machine_mode vec_mode, 
machine_mode extr_mode)
    comparisons must have already been folded.  */
 
 static void
-prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, rtx size,
+prepare_cmp_insn (rtx x, rtx y, rtx *mask, enum rtx_code comparison, rtx size,
                  int unsignedp, enum optab_methods methods,
-                 rtx *ptest, machine_mode *pmode)
+                 rtx *ptest, machine_mode *pmode, optab optab)
 {
   machine_mode mode = *pmode;
   rtx libfunc, test;
@@ -4534,7 +4539,7 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, 
rtx size,
   FOR_EACH_WIDER_MODE_FROM (cmp_mode, mode)
     {
       enum insn_code icode;
-      icode = optab_handler (cbranch_optab, cmp_mode);
+      icode = optab_handler (optab, cmp_mode);
       if (icode != CODE_FOR_nothing
          && insn_operand_matches (icode, 0, test))
        {
@@ -4566,8 +4571,8 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, 
rtx size,
       /* Small trick if UNORDERED isn't implemented by the hardware.  */
       if (comparison == UNORDERED && rtx_equal_p (x, y))
        {
-         prepare_cmp_insn (x, y, UNLT, NULL_RTX, unsignedp, OPTAB_WIDEN,
-                           ptest, pmode);
+         prepare_cmp_insn (x, y, mask, UNLT, NULL_RTX, unsignedp, OPTAB_WIDEN,
+                           ptest, pmode, optab);
          if (*ptest)
            return;
        }
@@ -4618,8 +4623,8 @@ prepare_cmp_insn (rtx x, rtx y, enum rtx_code comparison, 
rtx size,
        }
 
       *pmode = ret_mode;
-      prepare_cmp_insn (x, y, comparison, NULL_RTX, unsignedp, methods,
-                       ptest, pmode);
+      prepare_cmp_insn (x, y, mask, comparison, NULL_RTX, unsignedp, methods,
+                       ptest, pmode, optab);
     }
 
   return;
@@ -4657,9 +4662,9 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, 
machine_mode mode,
    we can do the branch.  */
 
 static void
-emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
-                         direct_optab cmp_optab, profile_probability prob,
-                         bool test_branch)
+emit_cmp_and_jump_insn_1 (rtx test, rtx cond, rtx len, rtx bias,
+                         machine_mode mode, rtx label, direct_optab cmp_optab,
+                         profile_probability prob, bool test_branch)
 {
   machine_mode optab_mode;
   enum mode_class mclass;
@@ -4672,9 +4677,21 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, 
rtx label,
 
   gcc_assert (icode != CODE_FOR_nothing);
   gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
+  gcc_assert (cond == NULL_RTX || (cond != NULL_RTX && !test_branch));
   if (test_branch)
     insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
                                            XEXP (test, 1), label));
+  else if (len)
+    {
+      gcc_assert (cond);
+      gcc_assert (bias);
+      insn = emit_jump_insn (GEN_FCN (icode) (test, cond, XEXP (test, 0),
+                                             XEXP (test, 1), len, bias,
+                                             label));
+    }
+  else if (cond)
+    insn = emit_jump_insn (GEN_FCN (icode) (test, cond, XEXP (test, 0),
+                                           XEXP (test, 1), label));
   else
     insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
                                            XEXP (test, 1), label));
@@ -4796,22 +4813,203 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code 
comparison, rtx size,
   if (unsignedp)
     comparison = unsigned_condition (comparison);
 
-  prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
-                   &test, &mode);
+  /* cbranch is no longer preferred for vectors, so when using a vector mode
+     check vec_cbranch variants instead.  */
+  if (!VECTOR_MODE_P (GET_MODE (op0)))
+    prepare_cmp_insn (op0, op1, NULL, comparison, size, unsignedp,
+                     OPTAB_LIB_WIDEN, &test, &mode, cbranch_optab);
 
   /* Check if we're comparing a truth type with 0, and if so check if
      the target supports tbranch.  */
   machine_mode tmode = mode;
   direct_optab optab;
-  if (op1 == CONST0_RTX (GET_MODE (op1))
-      && validate_test_and_branch (val, &test, &tmode,
-                                  &optab) != CODE_FOR_nothing)
+  if (op1 == CONST0_RTX (GET_MODE (op1)))
     {
-      emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
-      return;
+      if (!VECTOR_MODE_P (GET_MODE (op1))
+         && validate_test_and_branch (val, &test, &tmode,
+                                      &optab) != CODE_FOR_nothing)
+       {
+         emit_cmp_and_jump_insn_1 (test, NULL_RTX, NULL_RTX, NULL_RTX, tmode,
+                                   label, optab, prob, true);
+         return;
+       }
+
+      /* If we are comparing equality with 0, check if VAL is another equality
+        comparison and if the target supports it directly.  */
+      gimple *def_stmt = NULL;
+      if (val && TREE_CODE (val) == SSA_NAME
+         && VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (val))
+         && (comparison == NE || comparison == EQ)
+         && (def_stmt = get_gimple_for_ssa_name (val)))
+       {
+         tree masked_op = NULL_TREE;
+         tree len_op = NULL_TREE;
+         tree len_bias = NULL_TREE;
+         /* First determine if the operation should be masked or unmasked.  */
+         if (is_gimple_assign (def_stmt)
+             && gimple_assign_rhs_code (def_stmt) == BIT_AND_EXPR)
+           {
+             /* See if one side if a comparison, if so use the other side as
+                the mask.  */
+             gimple *mask_def = NULL;
+             tree rhs1 = gimple_assign_rhs1 (def_stmt);
+             tree rhs2 = gimple_assign_rhs2 (def_stmt);
+             if ((mask_def = get_gimple_for_ssa_name (rhs1))
+                 && is_gimple_assign (mask_def)
+                 && TREE_CODE_CLASS (gimple_assign_rhs_code (mask_def)))
+               masked_op = rhs2;
+             else if ((mask_def = get_gimple_for_ssa_name (rhs2))
+                 && is_gimple_assign (mask_def)
+                 && TREE_CODE_CLASS (gimple_assign_rhs_code (mask_def)))
+               masked_op = rhs1;
+
+             if (masked_op)
+               def_stmt = mask_def;
+           }
+           /* Else check to see if we're a LEN target.  */
+         else if (is_gimple_call (def_stmt)
+                  && gimple_call_internal_p (def_stmt)
+                  && gimple_call_internal_fn (def_stmt) == IFN_VCOND_MASK_LEN)
+           {
+             /* Example to consume:
+
+                  a = _59 != vect__4.17_75;
+                  vcmp = .VCOND_MASK_LEN (a, { -1, ... }, { 0, ... }, _90, 0);
+                  if (vcmp != { 0, ... })
+
+               and transform into
+
+                  if (cond_len_vec_cbranch_any ({-1, ...}, a, _90, 0)).  */
+             gcall *call = dyn_cast <gcall *> (def_stmt);
+             tree true_branch = gimple_call_arg (call, 1);
+             tree false_branch = gimple_call_arg (call, 2);
+             if (integer_minus_onep (true_branch)
+                 && integer_zerop (false_branch))
+               {
+                 len_op = gimple_call_arg (call, 3);
+                 len_bias = gimple_call_arg (call, 4);
+                 tree arg0 = gimple_call_arg (call, 0);
+
+                 def_stmt = get_gimple_for_ssa_name (arg0);
+               }
+           }
+
+         enum insn_code icode;
+         if (is_gimple_assign (def_stmt)
+             && TREE_CODE_CLASS (gimple_assign_rhs_code (def_stmt))
+                  == tcc_comparison)
+           {
+             class expand_operand ops[5];
+             rtx_insn *tmp = NULL;
+             start_sequence ();
+             rtx op0c = expand_normal (gimple_assign_rhs1 (def_stmt));
+             rtx op1c = expand_normal (gimple_assign_rhs2 (def_stmt));
+             machine_mode mode2 = GET_MODE (op0c);
+
+             int nops = masked_op ? 3 : (len_op ? 5 : 2);
+             int offset = masked_op || len_op ? 1 : 0;
+             create_input_operand (&ops[offset + 0], op0c, mode2);
+             create_input_operand (&ops[offset + 1], op1c, mode2);
+             if (masked_op)
+               {
+                 rtx mask_op = expand_normal (masked_op);
+                 auto mask_mode = GET_MODE (mask_op);
+                 create_input_operand (&ops[0], mask_op, mask_mode);
+               }
+             else if (len_op)
+               {
+                 rtx len_rtx = expand_normal (len_op);
+                 rtx len_bias_rtx = expand_normal (len_bias);
+                 tree lhs = gimple_get_lhs (def_stmt);
+                 auto mask_mode = TYPE_MODE (TREE_TYPE (lhs));
+                 create_input_operand (&ops[0], CONSTM1_RTX (mask_mode),
+                                       mask_mode);
+                 create_input_operand (&ops[3], len_rtx, GET_MODE (len_rtx));
+                 create_input_operand (&ops[4], len_bias_rtx,
+                                       GET_MODE (len_bias_rtx));
+               }
+
+             int unsignedp2 = TYPE_UNSIGNED (TREE_TYPE (val));
+             auto inner_code = gimple_assign_rhs_code (def_stmt);
+             rtx test2 = NULL_RTX;
+
+             enum rtx_code comparison2 = get_rtx_code (inner_code, unsignedp2);
+             if (unsignedp2)
+               comparison2 = unsigned_condition (comparison2);
+             if (comparison == NE)
+               optab = masked_op ? cond_vec_cbranch_any_optab
+                                 : len_op ? cond_len_vec_cbranch_any_optab
+                                          : vec_cbranch_any_optab;
+             else
+               optab = masked_op ? cond_vec_cbranch_all_optab
+                                 : len_op ? cond_len_vec_cbranch_all_optab
+                                          : vec_cbranch_all_optab;
+
+             if ((icode = optab_handler (optab, mode2))
+                 != CODE_FOR_nothing
+                 && maybe_legitimize_operands (icode, 1, nops, ops))
+               {
+                 test2 = gen_rtx_fmt_ee (comparison2, VOIDmode,
+                                         ops[offset + 0].value,
+                                         ops[offset + 1].value);
+                 if (insn_operand_matches (icode, 0, test2))
+                   {
+                     rtx mask
+                       = (masked_op || len_op) ? ops[0].value : NULL_RTX;
+                     rtx len = len_op ? ops[3].value : NULL_RTX;
+                     rtx bias = len_op ? ops[4].value : NULL_RTX;
+                     emit_cmp_and_jump_insn_1 (test2, mask, len, bias, mode2,
+                                               label, optab, prob, false);
+                     tmp = get_insns ();
+                   }
+               }
+
+             end_sequence ();
+             if (tmp)
+               {
+                 emit_insn (tmp);
+                 return;
+               }
+           }
+       }
+    }
+
+  /*  cbranch should only be used for VECTOR_BOOLEAN_TYPE_P values.   */
+  direct_optab base_optab = cbranch_optab;
+  if (VECTOR_MODE_P (GET_MODE (op0)))
+    {
+      /* If cbranch is provided, use it.  If we get here it means we have an
+        instruction in between what created the boolean value and the gcond
+        that is not a masking operation.  This can happen for instance during
+        unrolling of early-break where we have an OR-reduction to reduce the
+        masks.  In this case knowing we have a mask can let us generate better
+        code.  If it's not there there then check the vector specific
+        optabs.  */
+      if (optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
+       {
+         if (comparison == NE)
+           base_optab = vec_cbranch_any_optab;
+         else
+           base_optab = vec_cbranch_all_optab;
+
+         prepare_cmp_insn (op0, op1, NULL, comparison, size, unsignedp,
+                           OPTAB_DIRECT, &test, &mode, base_optab);
+
+         enum insn_code icode = optab_handler (base_optab, mode);
+
+         /* If the new cbranch isn't supported, degrade back to old one.  */
+         if (icode == CODE_FOR_nothing
+             || !test
+             || !insn_operand_matches (icode, 0, test))
+           base_optab = cbranch_optab;
+       }
+
+      prepare_cmp_insn (op0, op1, NULL, comparison, size, unsignedp,
+                       OPTAB_LIB_WIDEN, &test, &mode, base_optab);
     }
 
-  emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
+  emit_cmp_and_jump_insn_1 (test, NULL_RTX, NULL_RTX, NULL_RTX, mode, label,
+                           base_optab, prob, false);
 }
 
 /* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown.  */
@@ -5099,9 +5297,9 @@ emit_conditional_move (rtx target, struct rtx_comparison 
comp,
              else if (rtx_equal_p (orig_op1, op3))
                op3p = XEXP (comparison, 1) = force_reg (cmpmode, orig_op1);
            }
-         prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
+         prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), NULL,
                            GET_CODE (comparison), NULL_RTX, unsignedp,
-                           OPTAB_WIDEN, &comparison, &cmpmode);
+                           OPTAB_WIDEN, &comparison, &cmpmode, cbranch_optab);
          if (comparison)
            {
               rtx res = emit_conditional_move_1 (target, comparison,
@@ -5316,9 +5514,9 @@ emit_conditional_add (rtx target, enum rtx_code code, rtx 
op0, rtx op1,
 
   do_pending_stack_adjust ();
   last = get_last_insn ();
-  prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1),
-                    GET_CODE (comparison), NULL_RTX, unsignedp, OPTAB_WIDEN,
-                    &comparison, &cmode);
+  prepare_cmp_insn (XEXP (comparison, 0), XEXP (comparison, 1), NULL,
+                   GET_CODE (comparison), NULL_RTX, unsignedp, OPTAB_WIDEN,
+                   &comparison, &cmode, cbranch_optab);
   if (comparison)
     {
       class expand_operand ops[4];
@@ -6132,8 +6330,8 @@ gen_cond_trap (enum rtx_code code, rtx op1, rtx op2, rtx 
tcode)
 
   do_pending_stack_adjust ();
   start_sequence ();
-  prepare_cmp_insn (op1, op2, code, NULL_RTX, false, OPTAB_DIRECT,
-                   &trap_rtx, &mode);
+  prepare_cmp_insn (op1, op2, NULL, code, NULL_RTX, false, OPTAB_DIRECT,
+                   &trap_rtx, &mode, cbranch_optab);
   if (!trap_rtx)
     insn = NULL;
   else
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 5218b6d6ec0a..7ed4327ec94a 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -269,6 +269,8 @@ OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
 OPTAB_D (cond_neg_optab, "cond_neg$a")
+OPTAB_D (cond_vec_cbranch_any_optab, "cond_vec_cbranch_any$a")
+OPTAB_D (cond_vec_cbranch_all_optab, "cond_vec_cbranch_all$a")
 OPTAB_D (cond_one_cmpl_optab, "cond_one_cmpl$a")
 OPTAB_D (cond_len_add_optab, "cond_len_add$a")
 OPTAB_D (cond_len_sub_optab, "cond_len_sub$a")
@@ -296,6 +298,8 @@ OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a")
 OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a")
 OPTAB_D (cond_len_neg_optab, "cond_len_neg$a")
 OPTAB_D (cond_len_one_cmpl_optab, "cond_len_one_cmpl$a")
+OPTAB_D (cond_len_vec_cbranch_any_optab, "cond_len_vec_cbranch_any$a")
+OPTAB_D (cond_len_vec_cbranch_all_optab, "cond_len_vec_cbranch_all$a")
 OPTAB_D (vcond_mask_len_optab, "vcond_mask_len_$a")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")
@@ -428,6 +432,8 @@ OPTAB_D (smulhrs_optab, "smulhrs$a3")
 OPTAB_D (umulhs_optab, "umulhs$a3")
 OPTAB_D (umulhrs_optab, "umulhrs$a3")
 OPTAB_D (sdiv_pow2_optab, "sdiv_pow2$a3")
+OPTAB_D (vec_cbranch_any_optab, "vec_cbranch_any$a")
+OPTAB_D (vec_cbranch_all_optab, "vec_cbranch_all$a")
 OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
 OPTAB_D (vec_pack_ssat_optab, "vec_pack_ssat_$a")
 OPTAB_D (vec_pack_trunc_optab, "vec_pack_trunc_$a")

Reply via email to