https://gcc.gnu.org/g:fd743aab3bc8c521e0e8cd1742a71c2d713e2c76

commit r16-5649-gfd743aab3bc8c521e0e8cd1742a71c2d713e2c76
Author: Richard Biener <[email protected]>
Date:   Thu Nov 27 10:56:43 2025 +0100

    Fix OMP SIMD clone mask record/get again
    
    Post-CI checkin detected aarch64 fallout for the last change.  AARCH64
    has ABI twists that run into a case where an unmasked call when loop
    masked allows for a mask that has different shape than that of the
    return value which in turn has different type than that of an actual
    argument.
    
    While we do not support a mismatch of call mask shape with the
    OMP SIMD ABI mask shape when there's no call mask we have no such
    restriction.
    
    So the following fixes the record/get of a loop mask in the unmasked
    call case, also fixing a latent issue present before.  In particular
    do not record a random scalar operand as representing the mask.
    
    A testcase is in gcc.target/aarch64/vect-simd-clone-4.c.
    
            * tree-vect-stmts.cc (vectorizable_simd_clone_call): Fix
            recording of the mask type again.  Adjust placing of
            mask arguments for non-masked calls.

Diff:
---
 gcc/tree-vect-stmts.cc | 78 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 476a6e570e82..de28316ddc66 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -4047,7 +4047,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
 {
   tree vec_dest;
   tree scalar_dest;
-  tree op;
   tree vec_oprnd0 = NULL_TREE;
   tree vectype;
   poly_uint64 nunits;
@@ -4121,6 +4120,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
     {
       simd_call_arg_info thisarginfo;
       affine_iv iv;
+      tree op;
 
       thisarginfo.linear_step = 0;
       thisarginfo.align = 0;
@@ -4435,9 +4435,39 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
            case SIMD_CLONE_ARG_TYPE_MASK:
              if (loop_vinfo
                  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-               vect_record_loop_mask (loop_vinfo,
-                                      &LOOP_VINFO_MASKS (loop_vinfo),
-                                      ncopies_in, vectype, op);
+               {
+                 if (masked_call_offset)
+                   /* When there is an explicit mask we require the
+                      number of elements to match up.  */
+                   vect_record_loop_mask (loop_vinfo,
+                                          &LOOP_VINFO_MASKS (loop_vinfo),
+                                          ncopies_in, vectype, NULL_TREE);
+                 else
+                   {
+                     /* When there is no explicit mask on the call we have
+                        more relaxed requirements.  */
+                     tree masktype;
+                     poly_uint64 callee_nelements;
+                     if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
+                       {
+                         callee_nelements
+                           = exact_div (bestn->simdclone->simdlen,
+                                        bestn->simdclone->args[i].linear_step);
+                         masktype = get_related_vectype_for_scalar_type
+                             (vinfo->vector_mode, TREE_TYPE (vectype),
+                              callee_nelements);
+                       }
+                     else
+                       {
+                         masktype = bestn->simdclone->args[i].vector_type;
+                         callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
+                       }
+                     auto o = vector_unroll_factor (nunits, callee_nelements);
+                     vect_record_loop_mask (loop_vinfo,
+                                            &LOOP_VINFO_MASKS (loop_vinfo),
+                                            ncopies  * o, masktype, NULL_TREE);
+                   }
+               }
              break;
            }
        }
@@ -4499,7 +4529,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
        {
          unsigned int k, l, m, o;
          tree atype;
-         op = gimple_call_arg (stmt, i + masked_call_offset);
+         tree op = gimple_call_arg (stmt, i + masked_call_offset);
          switch (bestn->simdclone->args[i].arg_type)
            {
            case SIMD_CLONE_ARG_TYPE_VECTOR:
@@ -4818,12 +4848,20 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
          gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
                      SIMD_CLONE_ARG_TYPE_MASK);
 
-         tree masktype = bestn->simdclone->args[mask_i].vector_type;
+         tree mask_argtype = bestn->simdclone->args[mask_i].vector_type;
+         tree mask_vectype;
          if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
-           callee_nelements = exact_div (bestn->simdclone->simdlen,
-                                         
bestn->simdclone->args[i].linear_step);
+           {
+             callee_nelements = exact_div (bestn->simdclone->simdlen,
+                                           
bestn->simdclone->args[i].linear_step);
+             mask_vectype = get_related_vectype_for_scalar_type
+                 (vinfo->vector_mode, TREE_TYPE (vectype), callee_nelements);
+           }
          else
-           callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
+           {
+             mask_vectype = mask_argtype;
+             callee_nelements = TYPE_VECTOR_SUBPARTS (mask_vectype);
+           }
          o = vector_unroll_factor (nunits, callee_nelements);
          for (m = j * o; m < (j + 1) * o; m++)
            {
@@ -4831,10 +4869,11 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
                {
                  vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
                  mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
-                                            ncopies_in, vectype, j);
+                                            ncopies * o, mask_vectype, m);
                }
              else
-               mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
+               mask = vect_build_all_ones_mask (vinfo, stmt_info,
+                                                mask_argtype);
 
              gassign *new_stmt;
              if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
@@ -4852,23 +4891,18 @@ vectorizable_simd_clone_call (vec_info *vinfo, 
stmt_vec_info stmt_info,
                                             mask);
                  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
                  /* Then zero-extend to the mask mode.  */
-                 mask = fold_build1 (NOP_EXPR, masktype,
+                 mask = fold_build1 (NOP_EXPR, mask_argtype,
                                      gimple_get_lhs (new_stmt));
                }
              else if (bestn->simdclone->mask_mode == VOIDmode)
-               {
-                 tree one = fold_convert (TREE_TYPE (masktype),
-                                          integer_one_node);
-                 tree zero = fold_convert (TREE_TYPE (masktype),
-                                           integer_zero_node);
-                 mask = build3 (VEC_COND_EXPR, masktype, mask,
-                                build_vector_from_val (masktype, one),
-                                build_vector_from_val (masktype, zero));
-               }
+               mask = build3 (VEC_COND_EXPR, mask_argtype, mask,
+                              build_one_cst (mask_argtype),
+                              build_zero_cst (mask_argtype));
              else
                gcc_unreachable ();
 
-             new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
+             new_stmt = gimple_build_assign (make_ssa_name (mask_argtype),
+                                             mask);
              vect_finish_stmt_generation (vinfo, stmt_info,
                                           new_stmt, gsi);
              mask = gimple_assign_lhs (new_stmt);

Reply via email to