Re: [PATCH 04/10] vect: Ensure reduc_inputs always have vectype
Richard Biener writes: > On Thu, Jul 8, 2021 at 2:44 PM Richard Sandiford via Gcc-patches > wrote: >> >> Vector reduction accumulators can differ in signedness from the >> final scalar result. The conversions to handle that case were >> distributed through vect_create_epilog_for_reduction; this patch >> does the conversion up-front instead. > > But is that still correct? The conversions should be unsigned -> signed, > that is, we've performed the reduction in unsigned because we associated > the originally undefined overflow signed reduction. But the final > reduction of the vector lanes in the epilogue still needs to be done > unsigned. > > So it's just not obvious that the patch preserves this - if it does then > the patch is OK. We ended up covering most of this in the later 6/10 thread, but just to follow up here for the record, in case anyone looks at the list archives: In that scenario, the phis are created with the signed type and then (like you say) the reduction happens in the unsigned type. These conversions are from the signed type to the unsigned type ready for the reduction. All later code either performed the conversion itself or (in the case of some of the cond reductions) required the phi and reduction vectypes to be the same. I've pushed the series now -- thanks for the reviews. Richard
Re: [PATCH 04/10] vect: Ensure reduc_inputs always have vectype
On Thu, Jul 8, 2021 at 2:44 PM Richard Sandiford via Gcc-patches wrote: > > Vector reduction accumulators can differ in signedness from the > final scalar result. The conversions to handle that case were > distributed through vect_create_epilog_for_reduction; this patch > does the conversion up-front instead. But is that still correct? The conversions should be unsigned -> signed, that is, we've performed the reduction in unsigned because we associated the originally undefined overflow signed reduction. But the final reduction of the vector lanes in the epilogue still needs to be done unsigned. So it's just not obvious that the patch preserves this - if it does then the patch is OK. Richard. > gcc/ > * tree-vect-loop.c (vect_create_epilog_for_reduction): Convert > the phi results to vectype after creating them. Remove later > conversion code that thus becomes redundant. > --- > gcc/tree-vect-loop.c | 28 +++- > 1 file changed, 11 insertions(+), 17 deletions(-) > > diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c > index b7f73ca52c7..1bd9a6ea52c 100644 > --- a/gcc/tree-vect-loop.c > +++ b/gcc/tree-vect-loop.c > @@ -5214,9 +5214,11 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, >if (double_reduc) > loop = outer_loop; >exit_bb = single_exit (loop)->dest; > + exit_gsi = gsi_after_labels (exit_bb); >reduc_inputs.create (slp_node ? vec_num : ncopies); >for (unsigned i = 0; i < vec_num; i++) > { > + gimple_seq stmts = NULL; >if (slp_node) > def = vect_get_slp_vect_def (slp_node, i); >else > @@ -5228,12 +5230,12 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > if (j) > def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); > SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); > + new_def = gimple_convert (, vectype, new_def); > reduc_inputs.quick_push (new_def); > } > + gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); > } > > - exit_gsi = gsi_after_labels (exit_bb); > - >/* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 > (i.e. when reduc_fn is not available) and in the final adjustment > code (if needed). Also get the original scalar reduction variable as > @@ -5277,17 +5279,14 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, >|| ncopies > 1) > { >gimple_seq stmts = NULL; > - tree first_vect = gimple_convert (, vectype, reduc_inputs[0]); > + tree single_input = reduc_inputs[0]; >for (k = 1; k < reduc_inputs.length (); k++) > -{ > - tree second_vect = gimple_convert (, vectype, > reduc_inputs[k]); > - first_vect = gimple_build (, code, vectype, > -first_vect, second_vect); > -} > + single_input = gimple_build (, code, vectype, > +single_input, reduc_inputs[k]); >gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); > >reduc_inputs.truncate (0); > - reduc_inputs.safe_push (first_vect); > + reduc_inputs.safe_push (single_input); > } > >if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION > @@ -5323,10 +5322,6 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, >/* Vector of {0, 0, 0,...}. */ >tree zero_vec = build_zero_cst (vectype); > > - gimple_seq stmts = NULL; > - reduc_inputs[0] = gimple_convert (, vectype, reduc_inputs[0]); > - gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); > - >/* Find maximum value from the vector of found indexes. */ >tree max_index = make_ssa_name (index_scalar_type); >gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX, > @@ -5394,7 +5389,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > >/* Convert the reduced value back to the result type and set as the > result. */ > - stmts = NULL; > + gimple_seq stmts = NULL; >new_temp = gimple_build (, VIEW_CONVERT_EXPR, scalar_type, >data_reduc); >gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); > @@ -5412,7 +5407,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > val = data_reduc[i], idx_val = induction_index[i]; > return val; */ > > - tree data_eltype = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); > + tree data_eltype = TREE_TYPE (vectype); >tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); >unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); >poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE > (induction_index)); > @@ -5488,8 +5483,7 @@ vect_create_epilog_for_reduction (loop_vec_info > loop_vinfo, > "Reduce using direct vector reduction.\n"); > >
[PATCH 04/10] vect: Ensure reduc_inputs always have vectype
Vector reduction accumulators can differ in signedness from the final scalar result. The conversions to handle that case were distributed through vect_create_epilog_for_reduction; this patch does the conversion up-front instead. gcc/ * tree-vect-loop.c (vect_create_epilog_for_reduction): Convert the phi results to vectype after creating them. Remove later conversion code that thus becomes redundant. --- gcc/tree-vect-loop.c | 28 +++- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c index b7f73ca52c7..1bd9a6ea52c 100644 --- a/gcc/tree-vect-loop.c +++ b/gcc/tree-vect-loop.c @@ -5214,9 +5214,11 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (double_reduc) loop = outer_loop; exit_bb = single_exit (loop)->dest; + exit_gsi = gsi_after_labels (exit_bb); reduc_inputs.create (slp_node ? vec_num : ncopies); for (unsigned i = 0; i < vec_num; i++) { + gimple_seq stmts = NULL; if (slp_node) def = vect_get_slp_vect_def (slp_node, i); else @@ -5228,12 +5230,12 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, if (j) def = gimple_get_lhs (STMT_VINFO_VEC_STMTS (rdef_info)[j]); SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); + new_def = gimple_convert (, vectype, new_def); reduc_inputs.quick_push (new_def); } + gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); } - exit_gsi = gsi_after_labels (exit_bb); - /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 (i.e. when reduc_fn is not available) and in the final adjustment code (if needed). Also get the original scalar reduction variable as @@ -5277,17 +5279,14 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, || ncopies > 1) { gimple_seq stmts = NULL; - tree first_vect = gimple_convert (, vectype, reduc_inputs[0]); + tree single_input = reduc_inputs[0]; for (k = 1; k < reduc_inputs.length (); k++) -{ - tree second_vect = gimple_convert (, vectype, reduc_inputs[k]); - first_vect = gimple_build (, code, vectype, -first_vect, second_vect); -} + single_input = gimple_build (, code, vectype, +single_input, reduc_inputs[k]); gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); reduc_inputs.truncate (0); - reduc_inputs.safe_push (first_vect); + reduc_inputs.safe_push (single_input); } if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION @@ -5323,10 +5322,6 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Vector of {0, 0, 0,...}. */ tree zero_vec = build_zero_cst (vectype); - gimple_seq stmts = NULL; - reduc_inputs[0] = gimple_convert (, vectype, reduc_inputs[0]); - gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); - /* Find maximum value from the vector of found indexes. */ tree max_index = make_ssa_name (index_scalar_type); gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX, @@ -5394,7 +5389,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, /* Convert the reduced value back to the result type and set as the result. */ - stmts = NULL; + gimple_seq stmts = NULL; new_temp = gimple_build (, VIEW_CONVERT_EXPR, scalar_type, data_reduc); gsi_insert_seq_before (_gsi, stmts, GSI_SAME_STMT); @@ -5412,7 +5407,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, val = data_reduc[i], idx_val = induction_index[i]; return val; */ - tree data_eltype = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); + tree data_eltype = TREE_TYPE (vectype); tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); @@ -5488,8 +5483,7 @@ vect_create_epilog_for_reduction (loop_vec_info loop_vinfo, "Reduce using direct vector reduction.\n"); gimple_seq stmts = NULL; - reduc_inputs[0] = gimple_convert (, vectype, reduc_inputs[0]); - vec_elem_type = TREE_TYPE (TREE_TYPE (reduc_inputs[0])); + vec_elem_type = TREE_TYPE (vectype); new_temp = gimple_build (, as_combined_fn (reduc_fn), vec_elem_type, reduc_inputs[0]); new_temp = gimple_convert (, scalar_type, new_temp);