https://gcc.gnu.org/g:a4716ece529dfd29d169ccc96979f7c747231f25
commit r16-6709-ga4716ece529dfd29d169ccc96979f7c747231f25 Author: Richard Biener <[email protected]> Date: Fri Jan 9 09:35:21 2026 +0100 middle-end/123175 - fix parts of const VEC_PERM with relaxed input sizes The following fixes enough of const VEC_PERM folding and lowering to deal with the fallout for the two testcases from the PR. We usually do not generate such problematic VEC_PERM expressions, but we allow those since GCC 14. As can be seen we mishandle those, including failure to expand/lower them by zero-extending inputs (which is what __builtin_shufflevector does). I'm unsure as to what extent we get such permutes but Tamar indicates that aarch64 can handle those at least. PR middle-end/123175 * match.pd (vec_perm @0 @1 @2): Fixup for inputs having a different number of elements than the result. * tree-vect-generic.cc (lower_vec_perm): Likewise. * gcc.dg/torture/pr123175-1.c: New testcase. * gcc.dg/torture/pr123175-2.c: Likewise. Diff: --- gcc/match.pd | 10 ++++++---- gcc/testsuite/gcc.dg/torture/pr123175-1.c | 31 +++++++++++++++++++++++++++++++ gcc/testsuite/gcc.dg/torture/pr123175-2.c | 31 +++++++++++++++++++++++++++++++ gcc/tree-vect-generic.cc | 13 +++++++------ 4 files changed, 75 insertions(+), 10 deletions(-) diff --git a/gcc/match.pd b/gcc/match.pd index 492d88514fce..08d0810e9865 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -11515,12 +11515,13 @@ and, { /* Create a vec_perm_indices for the integer vector. */ poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (type); + poly_uint64 nelts_in = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)); bool single_arg = (op0 == op1); - vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts); + vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts_in); } - (if (sel.series_p (0, 1, 0, 1)) + (if (known_eq (nelts, nelts_in) && sel.series_p (0, 1, 0, 1)) { op0; } - (if (sel.series_p (0, 1, nelts, 1)) + (if (sel.series_p (0, 1, nelts_in, 1)) { op1; } (with { @@ -11533,7 +11534,7 @@ and, op0 = op1; sel.rotate_inputs (1); } - else if (known_ge (poly_uint64 (sel[0]), nelts)) + else if (known_ge (poly_uint64 (sel[0]), nelts_in)) { std::swap (op0, op1); sel.rotate_inputs (1); @@ -11568,6 +11569,7 @@ and, in that case. But only if the vector mode is supported, otherwise this is invalid GIMPLE. */ if (op_mode != BLKmode + && known_eq (nelts, nelts_in) && (TREE_CODE (cop0) == VECTOR_CST || TREE_CODE (cop0) == CONSTRUCTOR || TREE_CODE (cop1) == VECTOR_CST diff --git a/gcc/testsuite/gcc.dg/torture/pr123175-1.c b/gcc/testsuite/gcc.dg/torture/pr123175-1.c new file mode 100644 index 000000000000..eea341ee2cbe --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr123175-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fgimple" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v2si __attribute__((vector_size(8))); +typedef char v4qi __attribute__((vector_size(4))); + +v4si res; +v2si a; +v2si b; + +void __attribute__((noipa)) __GIMPLE() foo () +{ + v2si a_; + v2si b_; + v4si res_; + a_ = a; + b_ = b; + res_ = __VEC_PERM (a_, b_, _Literal (v4qi) { 0, 1, 2, 3 }); + res = res_; +} + +int main() +{ + a = (v2si){ 4, 3 }; + b = (v2si){ 2, 1 }; + foo (); + if (res[0] != 4 || res[1] != 3 || res[2] != 2 || res[3] != 1) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.dg/torture/pr123175-2.c b/gcc/testsuite/gcc.dg/torture/pr123175-2.c new file mode 100644 index 000000000000..32431bee914d --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr123175-2.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-additional-options "-fgimple" } */ + +typedef int v4si __attribute__((vector_size(16))); +typedef int v2si __attribute__((vector_size(8))); +typedef char v4qi __attribute__((vector_size(4))); + +v4si res; +v2si a; +v2si b; + +void __attribute__((noipa)) __GIMPLE() foo () +{ + v2si a_; + v2si b_; + v4si res_; + a_ = a; + b_ = b; + res_ = __VEC_PERM (a_, b_, _Literal (v4qi) { 0, 2, 2, 1 }); + res = res_; +} + +int main() +{ + a = (v2si){ 4, 3 }; + b = (v2si){ 2, 1 }; + foo (); + if (res[0] != 4 || res[1] != 2 || res[2] != 2 || res[3] != 3) + __builtin_abort (); + return 0; +} diff --git a/gcc/tree-vect-generic.cc b/gcc/tree-vect-generic.cc index 90b8f816e153..d34d2c5fa48d 100644 --- a/gcc/tree-vect-generic.cc +++ b/gcc/tree-vect-generic.cc @@ -1620,7 +1620,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi) tree mask_type = TREE_TYPE (mask); tree vect_elt_type = TREE_TYPE (vect_type); tree mask_elt_type = TREE_TYPE (mask_type); - unsigned HOST_WIDE_INT elements; + unsigned HOST_WIDE_INT elements, in_elements; vec<constructor_elt, va_gc> *v; tree constr, t, si, i_val; tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; @@ -1628,7 +1628,8 @@ lower_vec_perm (gimple_stmt_iterator *gsi) location_t loc = gimple_location (gsi_stmt (*gsi)); unsigned i; - if (!TYPE_VECTOR_SUBPARTS (res_vect_type).is_constant (&elements)) + if (!TYPE_VECTOR_SUBPARTS (res_vect_type).is_constant (&elements) + || !TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&in_elements)) return; if (TREE_CODE (mask) == SSA_NAME) @@ -1644,7 +1645,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi) if (TREE_CODE (mask) == VECTOR_CST && tree_to_vec_perm_builder (&sel_int, mask)) { - vec_perm_indices indices (sel_int, 2, elements); + vec_perm_indices indices (sel_int, 2, in_elements); machine_mode vmode = TYPE_MODE (vect_type); tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt)); machine_mode lhs_mode = TYPE_MODE (lhs_type); @@ -1729,10 +1730,10 @@ lower_vec_perm (gimple_stmt_iterator *gsi) unsigned HOST_WIDE_INT index; index = TREE_INT_CST_LOW (i_val); - if (!tree_fits_uhwi_p (i_val) || index >= elements) - i_val = build_int_cst (mask_elt_type, index & (elements - 1)); + if (!tree_fits_uhwi_p (i_val) || index >= in_elements) + i_val = build_int_cst (mask_elt_type, index & (in_elements - 1)); - if (two_operand_p && (index & elements) != 0) + if (two_operand_p && (index & in_elements) != 0) t = vector_element (gsi, vec1, i_val, &vec1tmp); else t = vector_element (gsi, vec0, i_val, &vec0tmp);
