https://gcc.gnu.org/g:a4716ece529dfd29d169ccc96979f7c747231f25

commit r16-6709-ga4716ece529dfd29d169ccc96979f7c747231f25
Author: Richard Biener <[email protected]>
Date:   Fri Jan 9 09:35:21 2026 +0100

    middle-end/123175 - fix parts of const VEC_PERM with relaxed input sizes
    
    The following fixes enough of const VEC_PERM folding and lowering
    to deal with the fallout for the two testcases from the PR.  We
    usually do not generate such problematic VEC_PERM expressions, but
    we allow those since GCC 14.  As can be seen we mishandle those,
    including failure to expand/lower them by zero-extending inputs (which is
    what __builtin_shufflevector does).
    
    I'm unsure as to what extent we get such permutes but Tamar indicates
    that aarch64 can handle those at least.
    
            PR middle-end/123175
            * match.pd (vec_perm @0 @1 @2): Fixup for inputs having a
            different number of elements than the result.
            * tree-vect-generic.cc (lower_vec_perm): Likewise.
    
            * gcc.dg/torture/pr123175-1.c: New testcase.
            * gcc.dg/torture/pr123175-2.c: Likewise.

Diff:
---
 gcc/match.pd                              | 10 ++++++----
 gcc/testsuite/gcc.dg/torture/pr123175-1.c | 31 +++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/torture/pr123175-2.c | 31 +++++++++++++++++++++++++++++++
 gcc/tree-vect-generic.cc                  | 13 +++++++------
 4 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 492d88514fce..08d0810e9865 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -11515,12 +11515,13 @@ and,
     {
       /* Create a vec_perm_indices for the integer vector.  */
       poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type);
+      poly_uint64 nelts_in = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0));
       bool single_arg = (op0 == op1);
-      vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
+      vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts_in);
     }
-    (if (sel.series_p (0, 1, 0, 1))
+    (if (known_eq (nelts, nelts_in) && sel.series_p (0, 1, 0, 1))
      { op0; }
-     (if (sel.series_p (0, 1, nelts, 1))
+     (if (sel.series_p (0, 1, nelts_in, 1))
       { op1; }
       (with
        {
@@ -11533,7 +11534,7 @@ and,
                 op0 = op1;
                 sel.rotate_inputs (1);
               }
-            else if (known_ge (poly_uint64 (sel[0]), nelts))
+            else if (known_ge (poly_uint64 (sel[0]), nelts_in))
               {
                 std::swap (op0, op1);
                 sel.rotate_inputs (1);
@@ -11568,6 +11569,7 @@ and,
              in that case.  But only if the vector mode is supported,
              otherwise this is invalid GIMPLE.  */
           if (op_mode != BLKmode
+              && known_eq (nelts, nelts_in)
               && (TREE_CODE (cop0) == VECTOR_CST
                   || TREE_CODE (cop0) == CONSTRUCTOR
                   || TREE_CODE (cop1) == VECTOR_CST
diff --git a/gcc/testsuite/gcc.dg/torture/pr123175-1.c 
b/gcc/testsuite/gcc.dg/torture/pr123175-1.c
new file mode 100644
index 000000000000..eea341ee2cbe
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr123175-1.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fgimple" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v2si __attribute__((vector_size(8)));
+typedef char v4qi __attribute__((vector_size(4)));
+
+v4si res;
+v2si a;
+v2si b;
+
+void __attribute__((noipa)) __GIMPLE() foo ()
+{
+  v2si a_;
+  v2si b_;
+  v4si res_;
+  a_ = a;
+  b_ = b;
+  res_ = __VEC_PERM (a_, b_, _Literal (v4qi) { 0, 1, 2, 3 });
+  res = res_;
+}
+
+int main()
+{
+  a = (v2si){ 4, 3 };
+  b = (v2si){ 2, 1 };
+  foo ();
+  if (res[0] != 4 || res[1] != 3 || res[2] != 2 || res[3] != 1)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr123175-2.c 
b/gcc/testsuite/gcc.dg/torture/pr123175-2.c
new file mode 100644
index 000000000000..32431bee914d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr123175-2.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-additional-options "-fgimple" } */
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef int v2si __attribute__((vector_size(8)));
+typedef char v4qi __attribute__((vector_size(4)));
+
+v4si res;
+v2si a;
+v2si b;
+
+void __attribute__((noipa)) __GIMPLE() foo ()
+{
+  v2si a_;
+  v2si b_;
+  v4si res_;
+  a_ = a;
+  b_ = b;
+  res_ = __VEC_PERM (a_, b_, _Literal (v4qi) { 0, 2, 2, 1 });
+  res = res_;
+}
+
+int main()
+{
+  a = (v2si){ 4, 3 };
+  b = (v2si){ 2, 1 };
+  foo ();
+  if (res[0] != 4 || res[1] != 2 || res[2] != 2 || res[3] != 3)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc
index 90b8f816e153..d34d2c5fa48d 100644
--- a/gcc/tree-vect-generic.cc
+++ b/gcc/tree-vect-generic.cc
@@ -1620,7 +1620,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   tree mask_type = TREE_TYPE (mask);
   tree vect_elt_type = TREE_TYPE (vect_type);
   tree mask_elt_type = TREE_TYPE (mask_type);
-  unsigned HOST_WIDE_INT elements;
+  unsigned HOST_WIDE_INT elements, in_elements;
   vec<constructor_elt, va_gc> *v;
   tree constr, t, si, i_val;
   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
@@ -1628,7 +1628,8 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   location_t loc = gimple_location (gsi_stmt (*gsi));
   unsigned i;
 
-  if (!TYPE_VECTOR_SUBPARTS (res_vect_type).is_constant (&elements))
+  if (!TYPE_VECTOR_SUBPARTS (res_vect_type).is_constant (&elements)
+      || !TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&in_elements))
     return;
 
   if (TREE_CODE (mask) == SSA_NAME)
@@ -1644,7 +1645,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   if (TREE_CODE (mask) == VECTOR_CST
       && tree_to_vec_perm_builder (&sel_int, mask))
     {
-      vec_perm_indices indices (sel_int, 2, elements);
+      vec_perm_indices indices (sel_int, 2, in_elements);
       machine_mode vmode = TYPE_MODE (vect_type);
       tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
       machine_mode lhs_mode = TYPE_MODE (lhs_type);
@@ -1729,10 +1730,10 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
          unsigned HOST_WIDE_INT index;
 
          index = TREE_INT_CST_LOW (i_val);
-         if (!tree_fits_uhwi_p (i_val) || index >= elements)
-           i_val = build_int_cst (mask_elt_type, index & (elements - 1));
+         if (!tree_fits_uhwi_p (i_val) || index >= in_elements)
+           i_val = build_int_cst (mask_elt_type, index & (in_elements - 1));
 
-          if (two_operand_p && (index & elements) != 0)
+         if (two_operand_p && (index & in_elements) != 0)
            t = vector_element (gsi, vec1, i_val, &vec1tmp);
          else
            t = vector_element (gsi, vec0, i_val, &vec0tmp);

Reply via email to