https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115382
--- Comment #1 from Robin Dapp <rdapp at gcc dot gnu.org> --- Would something like this work? The testcase ran successfully with Intel's SME with that change (and aarch64 qemu with SVE). diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 028692614bb..f9bf6a45611 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -7215,7 +7215,21 @@ vectorize_fold_left_reduction (loop_vec_info loop_vinfo, tree len = NULL_TREE; tree bias = NULL_TREE; if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) - mask = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num, vectype_in, i); + { + tree mask_loop = vect_get_loop_mask (loop_vinfo, gsi, masks, + vec_num, vectype_in, i); + if (is_cond_op) + { + /* Merge the loop mask and the cond_op mask. */ + mask = make_ssa_name (TREE_TYPE (mask_loop)); + gassign *and_stmt = gimple_build_assign (mask, BIT_AND_EXPR, + mask_loop, + vec_opmask[i]); + gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT); + } + else + mask = mask_loop; + } else if (is_cond_op) mask = vec_opmask[i]; if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)) -- 2.45.1