On 2021/4/30 14:32, Xionghu Luo wrote:
The vsel instruction is a bit-wise select instruction.  Using an
IF_THEN_ELSE to express it in RTL is wrong and leads to wrong code
being generated in the combine pass.  Per element selection is a
subset of per bit-wise selection,with the patch the pattern is
written using bit operations.  But there are 8 different patterns
to define "op0 := (op1 & ~op3) | (op2 & op3)":

(~op3&op1) | (op3&op2),
(~op3&op1) | (op2&op3),
(op3&op2) | (~op3&op1),
(op2&op3) | (~op3&op1),
(op1&~op3) | (op3&op2),
(op1&~op3) | (op2&op3),
(op3&op2) | (op1&~op3),
(op2&op3) | (op1&~op3),

Combine pass will swap (op1&~op3) to (~op3&op1) due to commutative
canonical, which could reduce it to the FIRST 4 patterns, but it won't
swap (op2&op3) | (~op3&op1) to (~op3&op1) | (op2&op3), so this patch
handles it with two patterns with different NOT op3 position and check
equality inside it.

Tested pass on Power8LE, any comments?

gcc/ChangeLog:

        * config/rs6000/altivec.md (*altivec_vsel<mode>): Change to ...
        (altivec_vsel<mode>): ... this and update define.
        (*altivec_vsel<mode>_uns): Delete.
        (altivec_vsel<mode>2): New define_insn.
        * config/rs6000/rs6000-call.c (altivec_expand_vec_sel_builtin):
        New.
        (altivec_expand_builtin): Call altivec_expand_vec_sel_builtin to
        expand vel_sel.
        * config/rs6000/rs6000.c (rs6000_emit_vector_cond_expr): Use
        bit-wise selection instead of per element.
        * config/rs6000/vector.md:
        * config/rs6000/vsx.md (*vsx_xxsel<mode>): Change to ...
        (vsx_xxsel<mode>): ... this and update define.
        (*vsx_xxsel<mode>_uns): Delete.
        (vsx_xxsel<mode>2): New define_insn.

gcc/testsuite/ChangeLog:

        * gcc.target/powerpc/pr94613.c: New test.
---
  gcc/config/rs6000/altivec.md               | 50 ++++++++-----
  gcc/config/rs6000/rs6000-call.c            | 81 +++++++++++++++++++---
  gcc/config/rs6000/rs6000.c                 | 19 +++--
  gcc/config/rs6000/vector.md                | 26 ++++---
  gcc/config/rs6000/vsx.md                   | 56 ++++++++++-----
  gcc/testsuite/gcc.target/powerpc/pr94613.c | 38 ++++++++++
  6 files changed, 201 insertions(+), 69 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr94613.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1351dafbc41..f874f975519 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -667,26 +667,44 @@ (define_insn "*altivec_gev4sf"
    "vcmpgefp %0,%1,%2"
    [(set_attr "type" "veccmp")])
-(define_insn "*altivec_vsel<mode>"
+(define_insn "altivec_vsel<mode>"
    [(set (match_operand:VM 0 "altivec_register_operand" "=v")
-       (if_then_else:VM
-        (ne:CC (match_operand:VM 1 "altivec_register_operand" "v")
-               (match_operand:VM 4 "zero_constant" ""))
-        (match_operand:VM 2 "altivec_register_operand" "v")
-        (match_operand:VM 3 "altivec_register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vsel %0,%3,%2,%1"
+       (ior:VM
+        (and:VM
+         (not:VM (match_operand:VM 3 "altivec_register_operand" "v"))
+         (match_operand:VM 1 "altivec_register_operand" "v"))
+        (and:VM
+         (match_operand:VM 2 "altivec_register_operand" "v")
+         (match_operand:VM 4 "altivec_register_operand" "v"))))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)
+  && (rtx_equal_p (operands[2], operands[3])
+  || rtx_equal_p (operands[4], operands[3]))"
+  {
+    if (rtx_equal_p (operands[2], operands[3]))
+      return "vsel %0,%1,%4,%3";
+    else
+      return "vsel %0,%1,%2,%3";
+  }
    [(set_attr "type" "vecmove")])
-(define_insn "*altivec_vsel<mode>_uns"
+(define_insn "altivec_vsel<mode>2"
    [(set (match_operand:VM 0 "altivec_register_operand" "=v")
-       (if_then_else:VM
-        (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v")
-                  (match_operand:VM 4 "zero_constant" ""))
-        (match_operand:VM 2 "altivec_register_operand" "v")
-        (match_operand:VM 3 "altivec_register_operand" "v")))]
-  "VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
-  "vsel %0,%3,%2,%1"
+       (ior:VM
+        (and:VM
+         (match_operand:VM 1 "altivec_register_operand" "v")
+         (match_operand:VM 2 "altivec_register_operand" "v"))
+        (and:VM
+         (not:VM (match_operand:VM 3 "altivec_register_operand" "v"))
+         (match_operand:VM 4 "altivec_register_operand" "v"))))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)
+  && (rtx_equal_p (operands[1], operands[3])
+  || rtx_equal_p (operands[2], operands[3]))"
+  {
+    if (rtx_equal_p (operands[1], operands[3]))
+      return "vsel %0,%4,%2,%3";
+    else
+      return "vsel %0,%4,%1,%3";
+ }
    [(set_attr "type" "vecmove")])
;; Fused multiply add.
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index f5676255387..d65bdc01055 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -3362,11 +3362,11 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
      RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 
RS6000_BTI_unsigned_V2DI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
      RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI_UNS,
      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 
RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI_UNS,
      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 
RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI_UNS,
      RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 
RS6000_BTI_unsigned_V2DI, RS6000_BTI_V2DI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_2DI,
      RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI,
@@ -3386,9 +3386,9 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
      RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
      RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 
RS6000_BTI_unsigned_V4SI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI_UNS,
      RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 
RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI_UNS,
      RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_4SI,
      RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 
RS6000_BTI_bool_V4SI },
@@ -3398,9 +3398,9 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
      RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
      RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 
RS6000_BTI_unsigned_V8HI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI_UNS,
      RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 
RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI_UNS,
      RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_8HI,
      RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V8HI, 
RS6000_BTI_bool_V8HI },
@@ -3410,9 +3410,9 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
      RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 
RS6000_BTI_bool_V16QI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
      RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 
RS6000_BTI_unsigned_V16QI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI_UNS,
      RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 
RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI },
-  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
+  { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI_UNS,
      RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
    { ALTIVEC_BUILTIN_VEC_SEL, ALTIVEC_BUILTIN_VSEL_16QI,
      RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_bool_V16QI, 
RS6000_BTI_bool_V16QI },
@@ -10996,6 +10996,46 @@ altivec_expand_vec_ext_builtin (tree exp, rtx target)
    return target;
  }
+/* Expand vec_sel builtin. */
+static rtx
+altivec_expand_vec_sel_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx op0, op1, op2, pat;
+  tree arg0, arg1, arg2;
+
+  arg0 = CALL_EXPR_ARG (exp, 0);
+  op0 = expand_normal (arg0);
+  arg1 = CALL_EXPR_ARG (exp, 1);
+  op1 = expand_normal (arg1);
+  arg2 = CALL_EXPR_ARG (exp, 2);
+  op2 = expand_normal (arg2);
+
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  machine_mode mode0 = insn_data[icode].operand[1].mode;
+  machine_mode mode1 = insn_data[icode].operand[2].mode;
+  machine_mode mode2 = insn_data[icode].operand[3].mode;
+
+  if (target == 0
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+  if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
+    op2 = copy_to_mode_reg (mode2, op2);
+
+  pat = GEN_FCN (icode) (target, op0, op1, op2, op2);
+  if (pat)
+    emit_insn (pat);
+  else
+    return NULL_RTX;
+
+  return target;
+}
+
  /* Expand the builtin in EXP and store the result in TARGET.  Store
     true in *EXPANDEDP if we found a builtin to expand.  */
  static rtx
@@ -11181,6 +11221,29 @@ altivec_expand_builtin (tree exp, rtx target, bool 
*expandedp)
        emit_insn (pat);
        return NULL_RTX;
+ case ALTIVEC_BUILTIN_VSEL_2DF:
+      return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv2df, exp,
+                                            target);
+    case ALTIVEC_BUILTIN_VSEL_2DI:
+    case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
+      return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv2di, exp,
+                                            target);
+    case ALTIVEC_BUILTIN_VSEL_4SF:
+      return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv4sf, exp,
+                                            target);
+    case ALTIVEC_BUILTIN_VSEL_4SI:
+    case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
+      return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv4si, exp,
+                                            target);
+    case ALTIVEC_BUILTIN_VSEL_8HI:
+    case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
+      return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv8hi, exp,
+                                            target);
+    case ALTIVEC_BUILTIN_VSEL_16QI:
+    case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
+      return altivec_expand_vec_sel_builtin (CODE_FOR_altivec_vselv16qi, exp,
+                                            target);
+
      case ALTIVEC_BUILTIN_DSSALL:
        emit_insn (gen_altivec_dssall ());
        return NULL_RTX;
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index befab53031c..329c6e7e5fb 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -15566,9 +15566,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, 
rtx op_false,
    machine_mode dest_mode = GET_MODE (dest);
    machine_mode mask_mode = GET_MODE (cc_op0);
    enum rtx_code rcode = GET_CODE (cond);
-  machine_mode cc_mode = CCmode;
    rtx mask;
-  rtx cond2;
    bool invert_move = false;
if (VECTOR_UNIT_NONE_P (dest_mode))
@@ -15608,8 +15606,6 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, 
rtx op_false,
      case GEU:
      case LTU:
      case LEU:
-      /* Mark unsigned tests with CCUNSmode.  */
-      cc_mode = CCUNSmode;
/* Invert condition to avoid compound test if necessary. */
        if (rcode == GEU || rcode == LEU)
@@ -15629,6 +15625,9 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, 
rtx op_false,
    if (!mask)
      return 0;
+ if (mask_mode != dest_mode)
+      mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
+
    if (invert_move)
      std::swap (op_true, op_false);
@@ -15668,13 +15667,11 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
    if (!REG_P (op_false) && !SUBREG_P (op_false))
      op_false = force_reg (dest_mode, op_false);
- cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
-                         CONST0_RTX (dest_mode));
-  emit_insn (gen_rtx_SET (dest,
-                         gen_rtx_IF_THEN_ELSE (dest_mode,
-                                               cond2,
-                                               op_true,
-                                               op_false)));
+  rtx tmp = gen_rtx_IOR (dest_mode,
+                        gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
+                                     op_false),
+                        gen_rtx_AND (dest_mode, mask, op_true));
+  emit_insn (gen_rtx_SET (dest, tmp));
    return 1;
  }
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 3446b03d40d..85ffcf6a763 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -862,23 +862,21 @@ (define_insn_and_split "vector_<code><mode>"
  ;; which is in the reverse order that we want
  (define_expand "vector_select_<mode>"
    [(set (match_operand:VEC_L 0 "vlogical_operand")
-       (if_then_else:VEC_L
-        (ne:CC (match_operand:VEC_L 3 "vlogical_operand")
-               (match_dup 4))
-        (match_operand:VEC_L 2 "vlogical_operand")
-        (match_operand:VEC_L 1 "vlogical_operand")))]
-  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "operands[4] = CONST0_RTX (<MODE>mode);")
+    (ior:VEC_L
+     (and:VEC_L (not:VEC_L (match_operand:VEC_L 3 "vlogical_operand"))
+      (match_operand:VEC_L 1 "vlogical_operand"))
+     (and:VEC_L (match_dup 3)
+     (match_operand:VEC_L 2 "vlogical_operand"))))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)")
(define_expand "vector_select_<mode>_uns"
    [(set (match_operand:VEC_L 0 "vlogical_operand")
-       (if_then_else:VEC_L
-        (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand")
-                  (match_dup 4))
-        (match_operand:VEC_L 2 "vlogical_operand")
-        (match_operand:VEC_L 1 "vlogical_operand")))]
-  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
-  "operands[4] = CONST0_RTX (<MODE>mode);")
+    (ior:VEC_L
+     (and:VEC_L (not:VEC_L (match_operand:VEC_L 3 "vlogical_operand"))
+      (match_operand:VEC_L 1 "vlogical_operand"))
+     (and:VEC_L (match_dup 3)
+     (match_operand:VEC_L 2 "vlogical_operand"))))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)")
;; Expansions that compare vectors producing a vector result and a predicate,
  ;; setting CR6 to indicate a combined status
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index bcb92be2f5c..e4817bd3060 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -2105,29 +2105,47 @@ (define_insn "*vsx_ge_<mode>_p"
    [(set_attr "type" "<VStype_simple>")])
;; Vector select
-(define_insn "*vsx_xxsel<mode>"
+(define_insn "vsx_xxsel<mode>"
    [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-       (if_then_else:VSX_L
-        (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-               (match_operand:VSX_L 4 "zero_constant" ""))
-        (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
-        (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxsel %x0,%x3,%x2,%x1"
+    (ior:VSX_L
+     (and:VSX_L
+      (not:VSX_L (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa"))
+      (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa"))
+     (and:VSX_L
+      (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
+      (match_operand:VSX_L 4 "vsx_register_operand" "<VSr>,wa"))))]
+  "VECTOR_MEM_VSX_P (<MODE>mode)
+  && (rtx_equal_p (operands[2], operands[3])
+  || rtx_equal_p (operands[4], operands[3]))"
+  {
+    if (rtx_equal_p (operands[2], operands[3]))
+      return "xxsel %x0,%x1,%x4,%x3";
+    else
+      return "xxsel %x0,%x1,%x2,%x3";
+  }
    [(set_attr "type" "vecmove")
     (set_attr "isa" "<VSisa>")])
-(define_insn "*vsx_xxsel<mode>_uns"
-  [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
-       (if_then_else:VSX_L
-        (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
-                  (match_operand:VSX_L 4 "zero_constant" ""))
-        (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")
-        (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa")))]
-  "VECTOR_MEM_VSX_P (<MODE>mode)"
-  "xxsel %x0,%x3,%x2,%x1"
-  [(set_attr "type" "vecmove")
-   (set_attr "isa" "<VSisa>")])
+(define_insn "vsx_xxsel<mode>2"
+ [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+   (ior:VSX_L
+    (and:VSX_L
+     (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
+     (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa"))
+    (and:VSX_L
+     (not:VSX_L (match_operand:VSX_L 3 "vsx_register_operand" "<VSr>,wa"))
+     (match_operand:VSX_L 4 "vsx_register_operand" "<VSr>,wa"))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)
+  && (rtx_equal_p (operands[1], operands[3])
+  || rtx_equal_p (operands[2], operands[3]))"
+  {
+    if (rtx_equal_p (operands[1], operands[3]))
+      return "xxsel %x0,%x4,%x2,%x3";
+    else
+      return "xxsel %x0,%x4,%x1,%x3";
+ }
+ [(set_attr "type" "vecmove")
+ (set_attr "isa" "<VSisa>")])
;; Copy sign
  (define_insn "vsx_copysign<mode>3"
diff --git a/gcc/testsuite/gcc.target/powerpc/pr94613.c 
b/gcc/testsuite/gcc.target/powerpc/pr94613.c
new file mode 100644
index 00000000000..06699088f9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr94613.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-O3 -maltivec" } */
+
+#include <altivec.h>
+
+/* The initial implementation of vec_sel used an IF_THEN_ELSE rtx.
+   This did NOT match what the vsel instruction does.  vsel is a
+   bit-wise operation.  Using IF_THEN_ELSE made the + operation to be
+   simplified away in combine.  A plus operation affects other bits in
+   the same element. Hence per-element simplifications are wrong for
+   vsel.  */
+vector unsigned char __attribute__((noinline))
+foo (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+  return vec_sel (a + b, c, a);
+}
+
+vector unsigned char __attribute__((noinline))
+bar (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+  return vec_sel (a | b, c, a);
+}
+
+int
+main ()
+{
+  vector unsigned char v = (vector unsigned char){ 1 };
+
+  if (foo (v, v, v)[0] != 3)
+      __builtin_abort ();
+
+  if (bar (v, v, v)[0] != 1)
+    __builtin_abort ();
+
+  return 0;
+}
+


--
Thanks,
Xionghu

Reply via email to