https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115382

--- Comment #1 from Robin Dapp <rdapp at gcc dot gnu.org> ---
Would something like this work?  The testcase ran successfully with Intel's SME
with that change (and aarch64 qemu with SVE).

diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 028692614bb..f9bf6a45611 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -7215,7 +7215,21 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo,
       tree len = NULL_TREE;
       tree bias = NULL_TREE;
       if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
-       mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in,
i);
+       {
+         tree mask_loop = vect_get_loop_mask (loop_vinfo, gsi, masks,
+                                              vec_num, vectype_in, i);
+         if (is_cond_op)
+           {
+             /* Merge the loop mask and the cond_op mask.  */
+             mask = make_ssa_name (TREE_TYPE (mask_loop));
+             gassign *and_stmt = gimple_build_assign (mask, BIT_AND_EXPR,
+                                                      mask_loop,
+                                                      vec_opmask[i]);
+             gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
+           }
+         else
+           mask = mask_loop;
+       }
       else if (is_cond_op)
        mask = vec_opmask[i];
       if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
-- 
2.45.1

Reply via email to