Hi, The vectorizer expects widening multiplication pattern to be:
type a_t, b_t; TYPE a_T, b_T, prod_T; a_T = (TYPE) a_t; b_T = (TYPE) b_t; prod_T = a_T * b_T; where type 'TYPE' is double the size of type 'type'. This works fine when the types are signed. For the unsigned types the code looks like: unsigned type a_t, b_t; unsigned TYPE u_prod_T; TYPE a_T, b_T, prod_T; a_T = (TYPE) a_t; b_T = (TYPE) b_t; prod_T = a_T * b_T; u_prod_T = (unsigned TYPE) prod_T; i.e., the multiplication is done on signed, followed by a cast to unsigned. This patch adds a support of such patterns and generates WIDEN_MULT_EXPR for the unsigned type. Another unsupported case is multiplication by a constant (e.g., b_T is a constant). This patch checks that the constant fits the smaller type 'type' and recognizes such cases as widening multiplication. Bootstrapped and tested on powerpc64-suse-linux. Tested the vectorization testsuite on arm-linux-gnueabi. I'll commit the patch shortly if there are no comments/objections. Ira ChangeLog: * tree-vectorizer.h (vect_recog_func_ptr): Make last argument to be a pointer. * tree-vect-patterns.c (vect_recog_widen_sum_pattern, vect_recog_widen_mult_pattern, vect_recog_dot_prod_pattern, vect_recog_pow_pattern): Likewise. (vect_pattern_recog_1): Remove declaration. (widened_name_p): Remove declaration. Add new argument to specify whether to check that both types are either signed or unsigned. (vect_recog_widen_mult_pattern): Update documentation. Handle unsigned patterns and multiplication by constants. (vect_pattern_recog_1): Update vect_recog_func references. Use statement information from the statement returned from pattern detection functions. (vect_pattern_recog): Update vect_recog_func reference. * tree-vect-stmts.c (vectorizable_type_promotion): For widening multiplication by a constant use the type of the other operand. testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_vect_widen_mult_qi_to_hi): Add NEON as supporting target. (check_effective_target_vect_widen_mult_hi_to_si): Likewise. (check_effective_target_vect_widen_mult_qi_to_hi_pattern): New. (check_effective_target_vect_widen_mult_hi_to_si_pattern): New. * gcc.dg/vect/vect-widen-mult-u8.c: Expect to be vectorized using widening multiplication on targets that support it. * gcc.dg/vect/vect-widen-mult-u16.c: Likewise. * gcc.dg/vect/vect-widen-mult-const-s16.c: New test. * gcc.dg/vect/vect-widen-mult-const-u16.c: New test.
Index: tree-vectorizer.h =================================================================== --- tree-vectorizer.h (revision 174475) +++ tree-vectorizer.h (working copy) @@ -896,7 +896,7 @@ extern void vect_slp_transform_bb (basic_block); /* Pattern recognition functions. Additional pattern recognition functions can (and will) be added in the future. */ -typedef gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); +typedef gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); #define NUM_PATTERNS 4 void vect_pattern_recog (loop_vec_info); Index: tree-vect-patterns.c =================================================================== --- tree-vect-patterns.c (revision 174475) +++ tree-vect-patterns.c (working copy) @@ -38,16 +38,11 @@ along with GCC; see the file COPYING3. If not see #include "recog.h" #include "diagnostic-core.h" -/* Function prototypes */ -static void vect_pattern_recog_1 - (gimple (* ) (gimple, tree *, tree *), gimple_stmt_iterator); -static bool widened_name_p (tree, gimple, tree *, gimple *); - /* Pattern recognition functions */ -static gimple vect_recog_widen_sum_pattern (gimple, tree *, tree *); -static gimple vect_recog_widen_mult_pattern (gimple, tree *, tree *); -static gimple vect_recog_dot_prod_pattern (gimple, tree *, tree *); -static gimple vect_recog_pow_pattern (gimple, tree *, tree *); +static gimple vect_recog_widen_sum_pattern (gimple *, tree *, tree *); +static gimple vect_recog_widen_mult_pattern (gimple *, tree *, tree *); +static gimple vect_recog_dot_prod_pattern (gimple *, tree *, tree *); +static gimple vect_recog_pow_pattern (gimple *, tree *, tree *); static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { vect_recog_widen_mult_pattern, vect_recog_widen_sum_pattern, @@ -61,10 +56,12 @@ static vect_recog_func_ptr vect_vect_recog_func_pt is a result of a type-promotion, such that: DEF_STMT: NAME = NOP (name0) where the type of name0 (HALF_TYPE) is smaller than the type of NAME. -*/ + If CHECK_SIGN is TRUE, check that either both types are signed or both are + unsigned. */ static bool -widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt) +widened_name_p (tree name, gimple use_stmt, tree *half_type, gimple *def_stmt, + bool check_sign) { tree dummy; gimple dummy_gimple; @@ -98,7 +95,7 @@ static bool *half_type = TREE_TYPE (oprnd0); if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*half_type) - || (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) + || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*half_type)) && check_sign) || (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 2))) return false; @@ -168,12 +165,12 @@ vect_recog_temp_ssa_var (tree type, gimple stmt) inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_dot_prod_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_dot_prod_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { gimple stmt; tree oprnd0, oprnd1; tree oprnd00, oprnd01; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); tree type, half_type; gimple pattern_stmt; tree prod_type; @@ -181,10 +178,10 @@ static gimple struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Look for the following pattern DX = (TYPE1) X; @@ -210,7 +207,7 @@ static gimple /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)) @@ -231,14 +228,14 @@ static gimple if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - stmt = last_stmt; + stmt = *last_stmt; - if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt)) + if (widened_name_p (oprnd0, stmt, &half_type, &def_stmt, true)) { stmt = def_stmt; oprnd0 = gimple_assign_rhs1 (stmt); @@ -247,7 +244,7 @@ static gimple half_type = type; } - /* So far so good. Since last_stmt was detected as a (summation) reduction, + /* So far so good. Since *last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a (widen_)mult_expr */ @@ -293,10 +290,10 @@ static gimple if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type) || !types_compatible_p (TREE_TYPE (oprnd1), prod_type)) return NULL; - if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt)) + if (!widened_name_p (oprnd0, stmt, &half_type0, &def_stmt, true)) return NULL; oprnd00 = gimple_assign_rhs1 (def_stmt); - if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt)) + if (!widened_name_p (oprnd1, stmt, &half_type1, &def_stmt, true)) return NULL; oprnd01 = gimple_assign_rhs1 (def_stmt); if (!types_compatible_p (half_type0, half_type1)) @@ -322,7 +319,7 @@ static gimple /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); return pattern_stmt; } @@ -342,24 +339,47 @@ static gimple where type 'TYPE' is at least double the size of type 'type'. + Also detect unsgigned cases: + + unsigned type a_t, b_t; + unsigned TYPE u_prod_T; + TYPE a_T, b_T, prod_T; + + S1 a_t = ; + S2 b_t = ; + S3 a_T = (TYPE) a_t; + S4 b_T = (TYPE) b_t; + S5 prod_T = a_T * b_T; + S6 u_prod_T = (unsigned TYPE) prod_T; + + and multiplication by constants: + + type a_t; + TYPE a_T, prod_T; + + S1 a_t = ; + S3 a_T = (TYPE) a_t; + S5 prod_T = a_T * CONST; + Input: - * LAST_STMT: A stmt from which the pattern search begins. In the example, - when this function is called with S5, the pattern {S3,S4,S5} is be detected. + * LAST_STMT: A stmt from which the pattern search begins. In the example, + when this function is called with S5, the pattern {S3,S4,S5,(S6)} is + detected. Output: * TYPE_IN: The type of the input arguments to the pattern. - * TYPE_OUT: The type of the output of this pattern. + * TYPE_OUT: The type of the output of this pattern. * Return value: A new stmt that will be used to replace the sequence of - stmts that constitute the pattern. In this case it will be: + stmts that constitute the pattern. In this case it will be: WIDEN_MULT <a_t, b_t> */ static gimple -vect_recog_widen_mult_pattern (gimple last_stmt, +vect_recog_widen_mult_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { @@ -367,40 +387,111 @@ static gimple tree oprnd0, oprnd1; tree type, half_type0, half_type1; gimple pattern_stmt; - tree vectype, vectype_out; + tree vectype, vectype_out = NULL_TREE; tree dummy; tree var; enum tree_code dummy_code; int dummy_int; VEC (tree, heap) *dummy_vec; + bool op0_ok, op1_ok; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != MULT_EXPR) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* Check argument 0 */ - if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0)) - return NULL; - oprnd0 = gimple_assign_rhs1 (def_stmt0); + /* Check argument 0. */ + op0_ok = widened_name_p (oprnd0, *last_stmt, &half_type0, &def_stmt0, false); + /* Check argument 1. */ + op1_ok = widened_name_p (oprnd1, *last_stmt, &half_type1, &def_stmt1, false); - /* Check argument 1 */ - if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1)) + /* In case of multiplication by a constant one of the operands may not match + the pattern, but not both. */ + if (!op0_ok && !op1_ok) return NULL; - oprnd1 = gimple_assign_rhs1 (def_stmt1); + if (op0_ok && op1_ok) + { + oprnd0 = gimple_assign_rhs1 (def_stmt0); + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } + else if (!op0_ok) + { + if (CONSTANT_CLASS_P (oprnd0) + && TREE_CODE (half_type1) == INTEGER_TYPE + && tree_int_cst_lt (oprnd0, TYPE_MAXVAL (half_type1)) + && tree_int_cst_lt (TYPE_MINVAL (half_type1), oprnd0)) + { + /* OPRND0 is a constant of HALF_TYPE1. */ + half_type0 = half_type1; + oprnd1 = gimple_assign_rhs1 (def_stmt1); + } + else + return NULL; + } + else if (!op1_ok) + { + if (CONSTANT_CLASS_P (oprnd1) + && TREE_CODE (half_type0) == INTEGER_TYPE + && tree_int_cst_lt (oprnd1, TYPE_MAXVAL (half_type0)) + && tree_int_cst_lt (TYPE_MINVAL (half_type0), oprnd1)) + { + /* OPRND1 is a constant of HALF_TYPE0. */ + half_type1 = half_type0; + oprnd0 = gimple_assign_rhs1 (def_stmt0); + } + else + return NULL; + } + + /* Handle unsigned case. Look for + S6 u_prod_T = (unsigned TYPE) prod_T; + Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */ + if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0)) + { + tree lhs = gimple_assign_lhs (*last_stmt), use_lhs; + imm_use_iterator imm_iter; + use_operand_p use_p; + int nuses = 0; + gimple use_stmt = NULL; + tree use_type; + + if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1)) + return NULL; + + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + use_stmt = USE_STMT (use_p); + nuses++; + } + + if (nuses != 1 || !is_gimple_assign (use_stmt) + || gimple_assign_rhs_code (use_stmt) != NOP_EXPR) + return NULL; + + use_lhs = gimple_assign_lhs (use_stmt); + use_type = TREE_TYPE (use_lhs); + if (!INTEGRAL_TYPE_P (use_type) + || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type)) + || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type))) + return NULL; + + type = use_type; + *last_stmt = use_stmt; + } + if (!types_compatible_p (half_type0, half_type1)) return NULL; @@ -413,7 +504,7 @@ static gimple vectype_out = get_vectype_for_scalar_type (type); if (!vectype || !vectype_out - || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, + || !supportable_widening_operation (WIDEN_MULT_EXPR, *last_stmt, vectype_out, vectype, &dummy, &dummy, &dummy_code, &dummy_code, &dummy_int, &dummy_vec)) @@ -462,16 +553,16 @@ static gimple */ static gimple -vect_recog_pow_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_pow_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { tree fn, base, exp = NULL; gimple stmt; tree var; - if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) + if (!is_gimple_call (*last_stmt) || gimple_call_lhs (*last_stmt) == NULL) return NULL; - fn = gimple_call_fndecl (last_stmt); + fn = gimple_call_fndecl (*last_stmt); if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL) return NULL; @@ -481,8 +572,8 @@ static gimple case BUILT_IN_POWI: case BUILT_IN_POWF: case BUILT_IN_POW: - base = gimple_call_arg (last_stmt, 0); - exp = gimple_call_arg (last_stmt, 1); + base = gimple_call_arg (*last_stmt, 0); + exp = gimple_call_arg (*last_stmt, 1); if (TREE_CODE (exp) != REAL_CST && TREE_CODE (exp) != INTEGER_CST) return NULL; @@ -574,21 +665,21 @@ static gimple inner-loop nested in an outer-loop that us being vectorized). */ static gimple -vect_recog_widen_sum_pattern (gimple last_stmt, tree *type_in, tree *type_out) +vect_recog_widen_sum_pattern (gimple *last_stmt, tree *type_in, tree *type_out) { gimple stmt; tree oprnd0, oprnd1; - stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); + stmt_vec_info stmt_vinfo = vinfo_for_stmt (*last_stmt); tree type, half_type; gimple pattern_stmt; loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_info); tree var; - if (!is_gimple_assign (last_stmt)) + if (!is_gimple_assign (*last_stmt)) return NULL; - type = gimple_expr_type (last_stmt); + type = gimple_expr_type (*last_stmt); /* Look for the following pattern DX = (TYPE) X; @@ -600,25 +691,25 @@ static gimple /* Starting from LAST_STMT, follow the defs of its uses in search of the above pattern. */ - if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR) + if (gimple_assign_rhs_code (*last_stmt) != PLUS_EXPR) return NULL; if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def) return NULL; - oprnd0 = gimple_assign_rhs1 (last_stmt); - oprnd1 = gimple_assign_rhs2 (last_stmt); + oprnd0 = gimple_assign_rhs1 (*last_stmt); + oprnd1 = gimple_assign_rhs2 (*last_stmt); if (!types_compatible_p (TREE_TYPE (oprnd0), type) || !types_compatible_p (TREE_TYPE (oprnd1), type)) return NULL; - /* So far so good. Since last_stmt was detected as a (summation) reduction, + /* So far so good. Since *last_stmt was detected as a (summation) reduction, we know that oprnd1 is the reduction variable (defined by a loop-header phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. Left to check that oprnd0 is defined by a cast from type 'type' to type 'TYPE'. */ - if (!widened_name_p (oprnd0, last_stmt, &half_type, &stmt)) + if (!widened_name_p (oprnd0, *last_stmt, &half_type, &stmt, true)) return NULL; oprnd0 = gimple_assign_rhs1 (stmt); @@ -639,7 +730,7 @@ static gimple /* We don't allow changing the order of the computation in the inner-loop when doing outer-loop vectorization. */ - gcc_assert (!nested_in_vect_loop_p (loop, last_stmt)); + gcc_assert (!nested_in_vect_loop_p (loop, *last_stmt)); return pattern_stmt; } @@ -669,23 +760,27 @@ static gimple static void vect_pattern_recog_1 ( - gimple (* vect_recog_func) (gimple, tree *, tree *), + gimple (* vect_recog_func) (gimple *, tree *, tree *), gimple_stmt_iterator si) { gimple stmt = gsi_stmt (si), pattern_stmt; - stmt_vec_info stmt_info = vinfo_for_stmt (stmt); + stmt_vec_info stmt_info; stmt_vec_info pattern_stmt_info; - loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + loop_vec_info loop_vinfo; tree pattern_vectype; tree type_in, type_out; enum tree_code code; int i; gimple next; - pattern_stmt = (* vect_recog_func) (stmt, &type_in, &type_out); + pattern_stmt = (* vect_recog_func) (&stmt, &type_in, &type_out); if (!pattern_stmt) return; + si = gsi_for_stmt (stmt); + stmt_info = vinfo_for_stmt (stmt); + loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); + if (VECTOR_MODE_P (TYPE_MODE (type_in))) { /* No need to check target support (already checked by the pattern @@ -832,7 +927,7 @@ vect_pattern_recog (loop_vec_info loop_vinfo) unsigned int nbbs = loop->num_nodes; gimple_stmt_iterator si; unsigned int i, j; - gimple (* vect_recog_func_ptr) (gimple, tree *, tree *); + gimple (* vect_recog_func_ptr) (gimple *, tree *, tree *); if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vect_pattern_recog ==="); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 174475) +++ tree-vect-stmts.c (working copy) @@ -3232,6 +3232,33 @@ vectorizable_type_promotion (gimple stmt, gimple_s fprintf (vect_dump, "use not simple."); return false; } + + op_type = TREE_CODE_LENGTH (code); + if (op_type == binary_op) + { + bool ok; + + op1 = gimple_assign_rhs2 (stmt); + if (code == WIDEN_MULT_EXPR) + { + /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of + OP1. */ + if (CONSTANT_CLASS_P (op0)) + ok = vect_is_simple_use_1 (op1, loop_vinfo, NULL, + &def_stmt, &def, &dt[1], &vectype_in); + else + ok = vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, + &dt[1]); + + if (!ok) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "use not simple."); + return false; + } + } + } + /* If op0 is an external or constant def use a vector type with the same size as the output vector type. */ if (!vectype_in) @@ -3264,18 +3291,6 @@ vectorizable_type_promotion (gimple stmt, gimple_s gcc_assert (ncopies >= 1); - op_type = TREE_CODE_LENGTH (code); - if (op_type == binary_op) - { - op1 = gimple_assign_rhs2 (stmt); - if (!vect_is_simple_use (op1, loop_vinfo, NULL, &def_stmt, &def, &dt[1])) - { - if (vect_print_dump_info (REPORT_DETAILS)) - fprintf (vect_dump, "use not simple."); - return false; - } - } - /* Supportable by target? */ if (!supportable_widening_operation (code, stmt, vectype_out, vectype_in, &decl1, &decl2, &code1, &code2, @@ -3301,6 +3316,14 @@ vectorizable_type_promotion (gimple stmt, gimple_s fprintf (vect_dump, "transform type promotion operation. ncopies = %d.", ncopies); + if (code == WIDEN_MULT_EXPR) + { + if (CONSTANT_CLASS_P (op0)) + op0 = fold_convert (TREE_TYPE (op1), op0); + else if (CONSTANT_CLASS_P (op1)) + op1 = fold_convert (TREE_TYPE (op0), op1); + } + /* Handle def. */ /* In case of multi-step promotion, we first generate promotion operations to the intermediate types, and then from that types to the final one.
Index: testsuite/lib/target-supports.exp =================================================================== --- testsuite/lib/target-supports.exp (revision 174475) +++ testsuite/lib/target-supports.exp (working copy) @@ -2668,7 +2668,8 @@ proc check_effective_target_vect_widen_mult_qi_to_ } else { set et_vect_widen_mult_qi_to_hi_saved 0 } - if { [istarget powerpc*-*-*] } { + if { [istarget powerpc*-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_widen_mult_qi_to_hi_saved 1 } } @@ -2701,7 +2702,8 @@ proc check_effective_target_vect_widen_mult_hi_to_ || [istarget spu-*-*] || [istarget ia64-*-*] || [istarget i?86-*-*] - || [istarget x86_64-*-*] } { + || [istarget x86_64-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { set et_vect_widen_mult_hi_to_si_saved 1 } } @@ -2710,6 +2712,52 @@ proc check_effective_target_vect_widen_mult_hi_to_ } # Return 1 if the target plus current options supports a vector +# widening multiplication of *char* args into *short* result, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_widen_mult_qi_to_hi_pattern { } { + global et_vect_widen_mult_qi_to_hi_pattern + + if [info exists et_vect_widen_mult_qi_to_hi_pattern_saved] { + verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: using cached result" 2 + } else { + set et_vect_widen_mult_qi_to_hi_pattern_saved 0 + if { [istarget powerpc*-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_qi_to_hi_pattern_saved 1 + } + } + verbose "check_effective_target_vect_widen_mult_qi_to_hi_pattern: returning $et_vect_widen_mult_qi_to_hi_pattern_saved" 2 + return $et_vect_widen_mult_qi_to_hi_pattern_saved +} + +# Return 1 if the target plus current options supports a vector +# widening multiplication of *short* args into *int* result, 0 otherwise. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_vect_widen_mult_hi_to_si_pattern { } { + global et_vect_widen_mult_hi_to_si_pattern + + if [info exists et_vect_widen_mult_hi_to_si_pattern_saved] { + verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: using cached result" 2 + } else { + set et_vect_widen_mult_hi_to_si_pattern_saved 0 + if { [istarget powerpc*-*-*] + || [istarget spu-*-*] + || [istarget ia64-*-*] + || [istarget i?86-*-*] + || [istarget x86_64-*-*] + || ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } { + set et_vect_widen_mult_hi_to_si_pattern_saved 1 + } + } + verbose "check_effective_target_vect_widen_mult_hi_to_si_pattern: returning $et_vect_widen_mult_hi_to_si_pattern_saved" 2 + return $et_vect_widen_mult_hi_to_si_pattern_saved +} + +# Return 1 if the target plus current options supports a vector # dot-product of signed chars, 0 otherwise. # # This won't change for different subtargets so cache the result. Index: testsuite/gcc.dg/vect/vect-widen-mult-u8.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-u8.c (revision 174475) +++ testsuite/gcc.dg/vect/vect-widen-mult-u8.c (working copy) @@ -9,7 +9,7 @@ unsigned char X[N] __attribute__ ((__aligned__(__B unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); unsigned short result[N]; -/* char->short widening-mult */ +/* unsigned char-> unsigned short widening-mult. */ __attribute__ ((noinline)) int foo1(int len) { int i; @@ -28,8 +28,7 @@ int main (void) for (i=0; i<N; i++) { X[i] = i; Y[i] = 64-i; - if (i%4 == 0) - X[i] = 5; + __asm__ volatile (""); } foo1 (N); @@ -43,5 +42,7 @@ int main (void) } /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_qi_to_hi_pattern } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */ Index: testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c (revision 0) +++ testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c (revision 0) @@ -0,0 +1,60 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" +#include <stdlib.h> + +#define N 32 + +__attribute__ ((noinline)) void +foo (int *__restrict a, + short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * 2333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * 2333) + abort (); +} + +__attribute__ ((noinline)) void +bar (int *__restrict a, + short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * (short) 2333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * (short) 2333) + abort (); +} + +int main (void) +{ + int i; + int a[N]; + short b[N]; + + for (i = 0; i < N; i++) + { + a[i] = 0; + b[i] = i; + __asm__ volatile (""); + } + + foo (a, b, N); + bar (a, b, N); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c (revision 0) +++ testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c (revision 0) @@ -0,0 +1,77 @@ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" +#include <stdlib.h> + +#define N 32 + +__attribute__ ((noinline)) void +foo (unsigned int *__restrict a, + unsigned short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * 2333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * 2333) + abort (); +} + +__attribute__ ((noinline)) void +bar (unsigned int *__restrict a, + unsigned short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = (unsigned short) 2333 * b[i]; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * (unsigned short) 2333) + abort (); +} + +__attribute__ ((noinline)) void +baz (unsigned int *__restrict a, + unsigned short *__restrict b, + int n) +{ + int i; + + for (i = 0; i < n; i++) + a[i] = b[i] * 233333333; + + for (i = 0; i < n; i++) + if (a[i] != b[i] * 233333333) + abort (); +} + + +int main (void) +{ + int i; + unsigned int a[N]; + unsigned short b[N]; + + for (i = 0; i < N; i++) + { + a[i] = 0; + b[i] = i; + __asm__ volatile (""); + } + + foo (a, b, N); + bar (a, b, N); + baz (a, b, N); + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" { target vect_widen_mult_hi_to_si } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 2 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ + Index: testsuite/gcc.dg/vect/vect-widen-mult-u16.c =================================================================== --- testsuite/gcc.dg/vect/vect-widen-mult-u16.c (revision 174475) +++ testsuite/gcc.dg/vect/vect-widen-mult-u16.c (working copy) @@ -9,13 +9,11 @@ unsigned short X[N] __attribute__ ((__aligned__(__ unsigned short Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__))); unsigned int result[N]; -/* short->int widening-mult */ +/* unsigned short->unsigned int widening-mult. */ __attribute__ ((noinline)) int foo1(int len) { int i; - /* Not vectorized because X[i] and Y[i] are casted to 'int' - so the widening multiplication pattern is not recognized. */ for (i=0; i<len; i++) { result[i] = (unsigned int)(X[i] * Y[i]); } @@ -43,8 +41,8 @@ int main (void) return 0; } -/*The induction loop is vectorized */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */ -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */ +/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ +/* { dg-final { scan-tree-dump-times "pattern recognized" 1 "vect" { target vect_widen_mult_hi_to_si_pattern } } } */ /* { dg-final { cleanup-tree-dump "vect" } } */