https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90323

--- Comment #9 from luoxhu at gcc dot gnu.org ---
Then we could optimized it in match.pd

diff --git a/gcc/match.pd b/gcc/match.pd
index 036f92fa959..8944312c153 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3711,6 +3711,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (if (integer_all_onesp (@1) && integer_zerop (@2))
     @0))))

+#if GIMPLE
+(simplify
+ (bit_xor @0 (bit_and @2 (bit_xor @0 @1)))
+ (if (optimize_vectors_before_lowering_p () && types_match (@0, @1)
+      && types_match (@0, @2) && VECTOR_TYPE_P (TREE_TYPE (@0))
+      && VECTOR_TYPE_P (TREE_TYPE (@1)) && VECTOR_TYPE_P (TREE_TYPE (@2)))
+ (with { tree itype = truth_type_for (type); }
+ (vec_cond (convert:itype @2) @1 @0))))
+#endif

in pr90323.c.033t.forwprop1, it will be optimized to:

  <bb 2> :
  _1 = ~mask_3(D);
  l_5 = _1 & l_4(D);
  _2 = mask_3(D) & r_6(D);
  _8 = l_4(D) ^ r_6(D);
  _10 = mask_3(D) & _8;
  _11 = (vector(4) <signed-boolean:32>) mask_3(D);
  l_7 = VEC_COND_EXPR <_11, r_6(D), l_4(D)>;
  return l_7;

Then in pr90323.c.243t.isel:

  <bb 2> [local count: 1073741824]:
  _6 = (vector(4) <signed-boolean:32>) mask_1(D);
  l_4 = .VCOND_MASK (_6, r_3(D), l_2(D));
  return l_4;

final ASM:

without_sel:
.LFB11:
        .cfi_startproc
        xxsel 34,34,35,36
        blr
        .long 0
        .byte 0,0,0,0,0,0,0,0
        .cfi_endproc
.LFE11:
        .size   without_sel,.-without_sel
        .align 2
        .p2align 4,,15
        .globl with_sel
        .type   with_sel, @function
with_sel:
.LFB12:
        .cfi_startproc
        xxsel 34,34,35,36
        blr


@segher, Is this reasonable fix ???

Reply via email to