Hi, When I added the support for Power10 newly introduced multiply highpart instrutions, I noticed that currently vectorizer doesn't try to vectorize multiply highpart pattern, I hope this isn't intentional?
This patch is to extend the existing pattern mulhs handlings to cover multiply highpart. Another alternative seems to recog mul_highpart operation in a general place applied for scalar code when the target supports the optab for the scalar operation, it's based on the assumption that one target which supports vector version of multiply highpart should have the scalar version. I noticed that the function can_mult_highpart_p can check/handle mult_highpart well even without mul_highpart optab support, I think to recog this pattern in vectorizer is better. Is it on the right track? Bootstrapped & regtested on powerpc64le-linux-gnu P9, x86_64-redhat-linux and aarch64-linux-gnu. BR, Kewen ----- gcc/ChangeLog: * tree-vect-patterns.c (vect_recog_mulhs_pattern): Add support to recog normal multiply highpart.
--- gcc/tree-vect-patterns.c | 67 ++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 19 deletions(-) diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index b2e7fc2cc7a..9253c8088e9 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1896,8 +1896,15 @@ vect_recog_over_widening_pattern (vec_info *vinfo, 1) Multiply high with scaling TYPE res = ((TYPE) a * (TYPE) b) >> c; + Here, c is bitsize (TYPE) / 2 - 1. + 2) ... or also with rounding TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; + Here, d is bitsize (TYPE) / 2 - 2. + + 3) Normal multiply high + TYPE res = ((TYPE) a * (TYPE) b) >> e; + Here, e is bitsize (TYPE) / 2. where only the bottom half of res is used. */ @@ -1942,7 +1949,6 @@ vect_recog_mulhs_pattern (vec_info *vinfo, stmt_vec_info mulh_stmt_info; tree scale_term; internal_fn ifn; - unsigned int expect_offset; /* Check for the presence of the rounding term. */ if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) @@ -1991,25 +1997,37 @@ vect_recog_mulhs_pattern (vec_info *vinfo, /* Get the scaling term. */ scale_term = gimple_assign_rhs2 (plus_input_stmt); + /* Check that the scaling factor is correct. */ + if (TREE_CODE (scale_term) != INTEGER_CST) + return NULL; + + /* Check pattern 2). */ + if (wi::to_widest (scale_term) + target_precision + 2 + != TYPE_PRECISION (lhs_type)) + return NULL; - expect_offset = target_precision + 2; ifn = IFN_MULHRS; } else { mulh_stmt_info = rshift_input_stmt_info; scale_term = gimple_assign_rhs2 (last_stmt); + /* Check that the scaling factor is correct. */ + if (TREE_CODE (scale_term) != INTEGER_CST) + return NULL; - expect_offset = target_precision + 1; - ifn = IFN_MULHS; + /* Check for pattern 1). */ + if (wi::to_widest (scale_term) + target_precision + 1 + == TYPE_PRECISION (lhs_type)) + ifn = IFN_MULHS; + /* Check for pattern 3). */ + else if (wi::to_widest (scale_term) + target_precision + == TYPE_PRECISION (lhs_type)) + ifn = IFN_LAST; + else + return NULL; } - /* Check that the scaling factor is correct. */ - if (TREE_CODE (scale_term) != INTEGER_CST - || wi::to_widest (scale_term) + expect_offset - != TYPE_PRECISION (lhs_type)) - return NULL; - /* Check whether the scaling input term can be seen as two widened inputs multiplied together. */ vect_unpromoted_value unprom_mult[2]; @@ -2029,9 +2047,14 @@ vect_recog_mulhs_pattern (vec_info *vinfo, /* Check for target support. */ tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); - if (!new_vectype - || !direct_internal_fn_supported_p - (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) + if (!new_vectype) + return NULL; + if (ifn != IFN_LAST + && !direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) + return NULL; + else if (ifn == IFN_LAST + && !can_mult_highpart_p (TYPE_MODE (new_vectype), + TYPE_UNSIGNED (new_type))) return NULL; /* The IR requires a valid vector type for the cast result, even though @@ -2040,14 +2063,20 @@ vect_recog_mulhs_pattern (vec_info *vinfo, if (!*type_out) return NULL; - /* Generate the IFN_MULHRS call. */ + gimple *mulhrs_stmt; tree new_var = vect_recog_temp_ssa_var (new_type, NULL); tree new_ops[2]; - vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type, - unprom_mult, new_vectype); - gcall *mulhrs_stmt - = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); - gimple_call_set_lhs (mulhrs_stmt, new_var); + vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type, unprom_mult, + new_vectype); + if (ifn == IFN_LAST) + mulhrs_stmt = gimple_build_assign (new_var, MULT_HIGHPART_EXPR, new_ops[0], + new_ops[1]); + else + { + /* Generate the IFN_MULHRS call. */ + mulhrs_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); + gimple_call_set_lhs (mulhrs_stmt, new_var); + } gimple_set_location (mulhrs_stmt, gimple_location (last_stmt)); if (dump_enabled_p ()) -- 2.17.1