This patch is my proposed fix for (the regression aspects of) PR123238,
a code quality regression on x86_64 triggered by the generation of
VCOND_MASK.  The regression is actually just bad luck.  From gimple,
VCOND_MASK(a==b,c,d) is equivalent to VCOND_MASK(a!=b,d,c), and which
form gets generated was previously arbitrary.  This is reasonable for
many (most?) targets, but on x86_64 there's an asymmetry, equality
can be performed in 1 instruction, but inequality requires three.

Teaching the middle-end's expand pass which form is preferred could
in theory be done with a new (very specific) target hook, that would
require documentation, but a more generic solution is for expand's
expand_vec_cond_mask_optab_fn to make use of rtx_costs, and reverse
the sense of VCOND_MASK if that would be an improvement.  This has
the convenient property that the default rtx_costs of all comparison
operators is the same, resulting in no change unless explicitly
specified by the backend.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?  The i386 bits and the middle-end
bits?


2026-02-11  Roger Sayle  <[email protected]>

gcc/ChangeLog
        PR target/123238
        * expr.cc (convert_tree_comp_to_rtx): Make global.
        * expr.h (convert_tree_comp_to_rtx): Prototype here.
        * internal-fn.cc (expand_vec_cond_mask_optab_fn): Use rtx_costs
        to determine whether swapping operands would result in better
        code.

        * config/i386/i386-expand.cc (ix86_expand_int_vec_cmp): On
        AVX512 targets use a ternlog instead of a comparison to negate
        the mask (requires one instruction instead of two).
        * config/i386/i386.cc (ix86_rtx_costs): Refactor code for UNSPEC.
        Provide costs for UNSPEC_BLENDV and  UNSPEC_MOVMSK.  Provide
        costs for comparison operators of integer vector modes.

gcc/testsuite/ChangeLog
        PR target/123238
        * gcc.target/i386/pr123238.c: New test case.


Roger
--

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a82bb4399c9..366ad513da9 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -5282,11 +5282,17 @@ ix86_expand_int_vec_cmp (rtx operands[])
     return false;
 
   if (negate)
-    cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
-                                  CONST0_RTX (GET_MODE (cmp)),
-                                  NULL, NULL, &negate);
-
-  gcc_assert (!negate);
+    {
+      if (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (cmp)) >= 16)
+       cmp = gen_rtx_XOR (GET_MODE (cmp), cmp, CONSTM1_RTX (GET_MODE (cmp)));
+      else
+       {
+         cmp = ix86_expand_int_sse_cmp (operands[0], EQ, cmp,
+                                        CONST0_RTX (GET_MODE (cmp)),
+                                        NULL, NULL, &negate);
+         gcc_assert (!negate);
+       }
+    }
 
   if (operands[0] != cmp)
     emit_move_insn (operands[0], cmp);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 42ae9ccb051..e434ebb835e 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -23024,36 +23024,71 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
       return false;
 
     case UNSPEC:
-      if (XINT (x, 1) == UNSPEC_TP)
-       *total = 0;
-      else if (XINT (x, 1) == UNSPEC_VTERNLOG)
+      switch (XINT (x, 1))
        {
+       case UNSPEC_TP:
+         *total = 0;
+         break;
+
+       case UNSPEC_VTERNLOG:
          *total = cost->sse_op;
-         *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
-         *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
-         *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
+         if (!REG_P (XVECEXP (x, 0, 0)))
+           *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+         if (!REG_P (XVECEXP (x, 0, 1)))
+           *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+         if (!REG_P (XVECEXP (x, 0, 2)))
+           *total += rtx_cost (XVECEXP (x, 0, 2), mode, code, 2, speed);
          return true;
-       }
-      else if (XINT (x, 1) == UNSPEC_PTEST)
-       {
+
+       case UNSPEC_PTEST:
+         {
+           *total = cost->sse_op;
+           rtx test_op0 = XVECEXP (x, 0, 0);
+           if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
+             return false;
+           if (GET_CODE (test_op0) == AND)
+             {
+               rtx and_op0 = XEXP (test_op0, 0);
+               if (GET_CODE (and_op0) == NOT)
+                 and_op0 = XEXP (and_op0, 0);
+               *total += rtx_cost (and_op0, GET_MODE (and_op0),
+                                   AND, 0, speed)
+                         + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
+                                     AND, 1, speed);
+            }
+           else
+             *total = rtx_cost (test_op0, GET_MODE (test_op0),
+                                UNSPEC, 0, speed);
+         }
+         return true;
+
+       case UNSPEC_BLENDV:
          *total = cost->sse_op;
-         rtx test_op0 = XVECEXP (x, 0, 0);
-         if (!rtx_equal_p (test_op0, XVECEXP (x, 0, 1)))
-           return false;
-         if (GET_CODE (test_op0) == AND)
+         if (!REG_P (XVECEXP (x, 0, 0)))
+           *total += rtx_cost (XVECEXP (x, 0, 0), mode, code, 0, speed);
+         if (!REG_P (XVECEXP (x, 0, 1)))
+           *total += rtx_cost (XVECEXP (x, 0, 1), mode, code, 1, speed);
+         if (!REG_P (XVECEXP (x, 0, 2)))
            {
-             rtx and_op0 = XEXP (test_op0, 0);
-             if (GET_CODE (and_op0) == NOT)
-               and_op0 = XEXP (and_op0, 0);
-             *total += rtx_cost (and_op0, GET_MODE (and_op0),
-                                 AND, 0, speed)
-                       + rtx_cost (XEXP (test_op0, 1), GET_MODE (and_op0),
-                                   AND, 1, speed);
+             rtx cond = XVECEXP (x, 0, 2);
+             if ((GET_CODE (cond) == LT || GET_CODE (cond) == GT)
+                 && CONST_VECTOR_P (XEXP (cond, 1)))
+               {
+                 /* avx2_blendvpd256_gt and friends.  */
+                 if (!REG_P (XEXP (cond, 0)))
+                   *total += rtx_cost (XEXP (cond, 0), mode, code, 2, speed);
+               }
+             else
+               *total += rtx_cost (cond, mode, code, 2, speed);
            }
-         else
-           *total = rtx_cost (test_op0, GET_MODE (test_op0),
-                              UNSPEC, 0, speed);
          return true;
+
+       case UNSPEC_MOVMSK:
+         *total = cost->sse_op;
+         return true;
+
+       default:
+         break;
        }
       return false;
 
@@ -23270,6 +23305,70 @@ ix86_rtx_costs (rtx x, machine_mode mode, int 
outer_code_i, int opno,
        }
       return false;
 
+    case EQ:
+    case GT:
+    case GTU:
+    case LT:
+    case LTU:
+      if (TARGET_SSE2
+         && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+         && GET_MODE_SIZE (mode) >= 8)
+       {
+         /* vpcmpeq */
+         *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (4);
+         if (!REG_P (XEXP (x, 0)))
+           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+         if (!REG_P (XEXP (x, 1)))
+           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+         return true;
+       }
+      if (TARGET_XOP
+         && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+         && GET_MODE_SIZE (mode) <= 16)
+       {
+         /* vpcomeq */
+         *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
+         if (!REG_P (XEXP (x, 0)))
+           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+         if (!REG_P (XEXP (x, 1)))
+           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+         return true;
+       }
+      return false;
+
+    case NE:
+    case GE:
+    case GEU:
+      if (TARGET_XOP
+         && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+         && GET_MODE_SIZE (mode) <= 16)
+       {
+         /* vpcomneq */
+         *total = speed ? COSTS_N_INSNS (1) : COSTS_N_BYTES (6);
+         if (!REG_P (XEXP (x, 0)))
+           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+         if (!REG_P (XEXP (x, 1)))
+           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+         return true;
+       }
+      if (TARGET_SSE2
+         && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+         && GET_MODE_SIZE (mode) >= 8)
+       {
+         if (TARGET_AVX512F && GET_MODE_SIZE (mode) >= 16)
+           /* vpcmpeq + vpternlog */
+           *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (11);
+         else
+           /* vpcmpeq + pxor + vpcmpeq */
+           *total = speed ? COSTS_N_INSNS (3) : COSTS_N_BYTES (12);
+         if (!REG_P (XEXP (x, 0)))
+           *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+         if (!REG_P (XEXP (x, 1)))
+           *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+         return true;
+       }
+      return false;
+
     default:
       return false;
     }
diff --git a/gcc/expr.cc b/gcc/expr.cc
index 4d1a6f3dd1c..203c116f2fa 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -9121,7 +9121,7 @@ highest_pow2_factor_for_target (const_tree target, 
const_tree exp)
 /* Convert the tree comparison code TCODE to the rtl one where the
    signedness is UNSIGNEDP.  */
 
-static enum rtx_code
+enum rtx_code
 convert_tree_comp_to_rtx (enum tree_code tcode, int unsignedp)
 {
   enum rtx_code code;
diff --git a/gcc/expr.h b/gcc/expr.h
index ddd47cb4ecc..1e89a142d8c 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -338,6 +338,7 @@ extern tree string_constant (tree, tree *, tree *, tree *);
    a constant.  */
 extern tree byte_representation (tree, tree *, tree *, tree *);
 
+extern enum rtx_code convert_tree_comp_to_rtx (enum tree_code, int);
 extern enum tree_code maybe_optimize_mod_cmp (enum tree_code, tree *, tree *);
 extern void maybe_optimize_sub_cmp_0 (enum tree_code, tree *, tree *);
 
diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc
index d879568c6e3..1d20da0e185 100644
--- a/gcc/internal-fn.cc
+++ b/gcc/internal-fn.cc
@@ -3229,6 +3229,58 @@ expand_vec_cond_mask_optab_fn (internal_fn, gcall *stmt, 
convert_optab optab)
 
   gcc_assert (icode != CODE_FOR_nothing);
 
+  /* Find the comparison generating the mask OP0.  */
+  tree cmp_op0 = NULL_TREE;
+  tree cmp_op1 = NULL_TREE;
+  enum tree_code cmp_code = TREE_CODE (op0);
+  if (TREE_CODE_CLASS (cmp_code) == tcc_comparison)
+    {
+      cmp_op0 = TREE_OPERAND (op0, 0);
+      cmp_op1 = TREE_OPERAND (op0, 1);
+    }
+  else if (cmp_code == SSA_NAME)
+    {
+      gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
+      if (is_gimple_assign (def_stmt))
+       {
+         cmp_code = gimple_assign_rhs_code (def_stmt);
+         if (TREE_CODE_CLASS (cmp_code) == tcc_comparison)
+           {
+             cmp_op0 = gimple_assign_rhs1 (def_stmt);
+             cmp_op1 = gimple_assign_rhs2 (def_stmt);
+           }
+       }
+    }
+
+  /* Decide whether to invert comparison based on rtx_cost.  */
+  if (cmp_op0)
+    {
+      enum tree_code rev_code;
+      tree op_type = TREE_TYPE (cmp_op0);
+      int unsignedp = TYPE_UNSIGNED (op_type);
+      rev_code = invert_tree_comparison (cmp_code, HONOR_NANS (op_type));
+
+      if (rev_code != ERROR_MARK)
+       {
+         tree cmp_type = TREE_TYPE (op0);
+         machine_mode cmp_mode = TYPE_MODE (cmp_type);
+         machine_mode op_mode = TYPE_MODE (op_type);
+         bool speed_p = optimize_insn_for_speed_p ();
+         rtx reg = gen_raw_REG (op_mode, LAST_VIRTUAL_REGISTER + 1);
+         rtx veccmp = gen_rtx_fmt_ee (convert_tree_comp_to_rtx (cmp_code, 
unsignedp),
+                                      cmp_mode, reg, reg);
+         int old_cost = rtx_cost (veccmp, cmp_mode, SET, 0, speed_p);
+         PUT_CODE (veccmp, convert_tree_comp_to_rtx (rev_code, unsignedp));
+         int new_cost = rtx_cost (veccmp, cmp_mode, SET, 0, speed_p);
+         if (new_cost < old_cost)
+           {
+             op0 = fold_build2_loc (EXPR_LOCATION (op0), rev_code,
+                                    cmp_type, cmp_op0, cmp_op1);
+             std::swap (op1, op2);
+           }
+       }
+    }
+
   mask = expand_normal (op0);
   rtx_op1 = expand_normal (op1);
   rtx_op2 = expand_normal (op2);
diff --git a/gcc/testsuite/gcc.target/i386/pr123238.c 
b/gcc/testsuite/gcc.target/i386/pr123238.c
new file mode 100644
index 00000000000..63906ae0fb4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr123238.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2" } */
+
+void f(char c[])
+{
+    for (int i = 0; i < 8; i++)
+        c[i] = c[i] ? 'a' : 'c';
+}
+
+/* { dg-final { scan-assembler-times "pcmpeqb" 1 } } */

Reply via email to