https://gcc.gnu.org/g:fd743aab3bc8c521e0e8cd1742a71c2d713e2c76
commit r16-5649-gfd743aab3bc8c521e0e8cd1742a71c2d713e2c76 Author: Richard Biener <[email protected]> Date: Thu Nov 27 10:56:43 2025 +0100 Fix OMP SIMD clone mask record/get again Post-CI checkin detected aarch64 fallout for the last change. AARCH64 has ABI twists that run into a case where an unmasked call when loop masked allows for a mask that has different shape than that of the return value which in turn has different type than that of an actual argument. While we do not support a mismatch of call mask shape with the OMP SIMD ABI mask shape when there's no call mask we have no such restriction. So the following fixes the record/get of a loop mask in the unmasked call case, also fixing a latent issue present before. In particular do not record a random scalar operand as representing the mask. A testcase is in gcc.target/aarch64/vect-simd-clone-4.c. * tree-vect-stmts.cc (vectorizable_simd_clone_call): Fix recording of the mask type again. Adjust placing of mask arguments for non-masked calls. Diff: --- gcc/tree-vect-stmts.cc | 78 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 22 deletions(-) diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 476a6e570e82..de28316ddc66 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -4047,7 +4047,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { tree vec_dest; tree scalar_dest; - tree op; tree vec_oprnd0 = NULL_TREE; tree vectype; poly_uint64 nunits; @@ -4121,6 +4120,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { simd_call_arg_info thisarginfo; affine_iv iv; + tree op; thisarginfo.linear_step = 0; thisarginfo.align = 0; @@ -4435,9 +4435,39 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, case SIMD_CLONE_ARG_TYPE_MASK: if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)) - vect_record_loop_mask (loop_vinfo, - &LOOP_VINFO_MASKS (loop_vinfo), - ncopies_in, vectype, op); + { + if (masked_call_offset) + /* When there is an explicit mask we require the + number of elements to match up. */ + vect_record_loop_mask (loop_vinfo, + &LOOP_VINFO_MASKS (loop_vinfo), + ncopies_in, vectype, NULL_TREE); + else + { + /* When there is no explicit mask on the call we have + more relaxed requirements. */ + tree masktype; + poly_uint64 callee_nelements; + if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode)) + { + callee_nelements + = exact_div (bestn->simdclone->simdlen, + bestn->simdclone->args[i].linear_step); + masktype = get_related_vectype_for_scalar_type + (vinfo->vector_mode, TREE_TYPE (vectype), + callee_nelements); + } + else + { + masktype = bestn->simdclone->args[i].vector_type; + callee_nelements = TYPE_VECTOR_SUBPARTS (masktype); + } + auto o = vector_unroll_factor (nunits, callee_nelements); + vect_record_loop_mask (loop_vinfo, + &LOOP_VINFO_MASKS (loop_vinfo), + ncopies * o, masktype, NULL_TREE); + } + } break; } } @@ -4499,7 +4529,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { unsigned int k, l, m, o; tree atype; - op = gimple_call_arg (stmt, i + masked_call_offset); + tree op = gimple_call_arg (stmt, i + masked_call_offset); switch (bestn->simdclone->args[i].arg_type) { case SIMD_CLONE_ARG_TYPE_VECTOR: @@ -4818,12 +4848,20 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, gcc_assert (bestn->simdclone->args[mask_i].arg_type == SIMD_CLONE_ARG_TYPE_MASK); - tree masktype = bestn->simdclone->args[mask_i].vector_type; + tree mask_argtype = bestn->simdclone->args[mask_i].vector_type; + tree mask_vectype; if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode)) - callee_nelements = exact_div (bestn->simdclone->simdlen, - bestn->simdclone->args[i].linear_step); + { + callee_nelements = exact_div (bestn->simdclone->simdlen, + bestn->simdclone->args[i].linear_step); + mask_vectype = get_related_vectype_for_scalar_type + (vinfo->vector_mode, TREE_TYPE (vectype), callee_nelements); + } else - callee_nelements = TYPE_VECTOR_SUBPARTS (masktype); + { + mask_vectype = mask_argtype; + callee_nelements = TYPE_VECTOR_SUBPARTS (mask_vectype); + } o = vector_unroll_factor (nunits, callee_nelements); for (m = j * o; m < (j + 1) * o; m++) { @@ -4831,10 +4869,11 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo); mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks, - ncopies_in, vectype, j); + ncopies * o, mask_vectype, m); } else - mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype); + mask = vect_build_all_ones_mask (vinfo, stmt_info, + mask_argtype); gassign *new_stmt; if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode)) @@ -4852,23 +4891,18 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, mask); gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); /* Then zero-extend to the mask mode. */ - mask = fold_build1 (NOP_EXPR, masktype, + mask = fold_build1 (NOP_EXPR, mask_argtype, gimple_get_lhs (new_stmt)); } else if (bestn->simdclone->mask_mode == VOIDmode) - { - tree one = fold_convert (TREE_TYPE (masktype), - integer_one_node); - tree zero = fold_convert (TREE_TYPE (masktype), - integer_zero_node); - mask = build3 (VEC_COND_EXPR, masktype, mask, - build_vector_from_val (masktype, one), - build_vector_from_val (masktype, zero)); - } + mask = build3 (VEC_COND_EXPR, mask_argtype, mask, + build_one_cst (mask_argtype), + build_zero_cst (mask_argtype)); else gcc_unreachable (); - new_stmt = gimple_build_assign (make_ssa_name (masktype), mask); + new_stmt = gimple_build_assign (make_ssa_name (mask_argtype), + mask); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); mask = gimple_assign_lhs (new_stmt);
