This patch fixes this following FAILs in RISC-V regression: FAIL: gcc.dg/vect/vect-gather-1.c -flto -ffat-lto-objects scan-tree-dump vect "Loop contains only SLP stmts" FAIL: gcc.dg/vect/vect-gather-1.c scan-tree-dump vect "Loop contains only SLP stmts" FAIL: gcc.dg/vect/vect-gather-3.c -flto -ffat-lto-objects scan-tree-dump vect "Loop contains only SLP stmts" FAIL: gcc.dg/vect/vect-gather-3.c scan-tree-dump vect "Loop contains only SLP stmts"
The root cause of these FAIL is that GCC SLP failed on MASK_LEN_GATHER_LOAD. Since for RVV, we build MASK_LEN_GATHER_LOAD with dummy mask (-1) in tree-vect-patterns.cc if it is same situation as GATHER_LOAD (no conditional mask). So we make MASK_LEN_GATHER_LOAD leverage the flow of GATHER_LOAD if mask argument is a dummy mask. gcc/ChangeLog: * tree-vect-slp.cc (vect_get_operand_map): (vect_build_slp_tree_1): (vect_build_slp_tree_2): * tree-vect-stmts.cc (vectorizable_load): --- gcc/tree-vect-slp.cc | 18 ++++++++++++++++-- gcc/tree-vect-stmts.cc | 4 ++-- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc index fa098f9ff4e..712c04ec278 100644 --- a/gcc/tree-vect-slp.cc +++ b/gcc/tree-vect-slp.cc @@ -544,6 +544,17 @@ vect_get_operand_map (const gimple *stmt, unsigned char swap = 0) case IFN_MASK_GATHER_LOAD: return arg1_arg4_map; + case IFN_MASK_LEN_GATHER_LOAD: + /* In tree-vect-patterns.cc, we will have these 2 situations: + + - Unconditional gather load transforms + into MASK_LEN_GATHER_LOAD with dummy mask which is -1. + + - Conditional gather load transforms + into MASK_LEN_GATHER_LOAD with real conditional mask.*/ + return integer_minus_onep (gimple_call_arg (call, 4)) ? arg1_map + : nullptr; + case IFN_MASK_STORE: return arg3_arg2_map; @@ -1077,7 +1088,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (cfn == CFN_MASK_LOAD || cfn == CFN_GATHER_LOAD - || cfn == CFN_MASK_GATHER_LOAD) + || cfn == CFN_MASK_GATHER_LOAD + || cfn == CFN_MASK_LEN_GATHER_LOAD) ldst_p = true; else if (cfn == CFN_MASK_STORE) { @@ -1337,6 +1349,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)) && rhs_code != CFN_GATHER_LOAD && rhs_code != CFN_MASK_GATHER_LOAD + && rhs_code != CFN_MASK_LEN_GATHER_LOAD /* Not grouped loads are handled as externals for BB vectorization. For loop vectorization we can handle splats the same we handle single element interleaving. */ @@ -1837,7 +1850,8 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD) || gimple_call_internal_p (stmt, IFN_GATHER_LOAD) - || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD)); + || gimple_call_internal_p (stmt, IFN_MASK_GATHER_LOAD) + || gimple_call_internal_p (stmt, IFN_MASK_LEN_GATHER_LOAD)); else { *max_nunits = this_max_nunits; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index cd7c1090d88..263acf5d3cd 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -9575,9 +9575,9 @@ vectorizable_load (vec_info *vinfo, return false; mask_index = internal_fn_mask_index (ifn); - if (mask_index >= 0 && slp_node) + if (mask_index >= 0 && slp_node && internal_fn_len_index (ifn) < 0) mask_index = vect_slp_child_index_for_operand (call, mask_index); - if (mask_index >= 0 + if (mask_index >= 0 && internal_fn_len_index (ifn) < 0 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index, &mask, NULL, &mask_dt, &mask_vectype)) return false; -- 2.36.3