Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 14/07/11 15:25, Richard Guenther wrote: Ok. Committed, with no real changes. I just updated the testcase a little. Andrew 2011-08-19 Andrew Stubbs gcc/ * tree-ssa-math-opts.c (convert_mult_to_widen): Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. gcc/testsuite/ * gcc.target/arm/wmul-6.c: New file. --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target arm_dsp } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlalbb" } } */ --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2068,12 +2068,13 @@ is_widening_mult_p (gimple stmt, static bool convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) { - tree lhs, rhs1, rhs2, type, type1, type2, tmp; + tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL; enum insn_code handler; enum machine_mode to_mode, from_mode, actual_mode; optab op; int actual_precision; location_t loc = gimple_location (stmt); + bool from_unsigned1, from_unsigned2; lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); @@ -2085,10 +2086,12 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) to_mode = TYPE_MODE (type); from_mode = TYPE_MODE (type1); + from_unsigned1 = TYPE_UNSIGNED (type1); + from_unsigned2 = TYPE_UNSIGNED (type2); - if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) + if (from_unsigned1 && from_unsigned2) op = umul_widen_optab; - else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) + else if (!from_unsigned1 && !from_unsigned2) op = smul_widen_optab; else op = usmul_widen_optab; @@ -2097,22 +2100,45 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) 0, &actual_mode); if (handler == CODE_FOR_nothing) -return false; +{ + if (op != smul_widen_optab) + { + from_mode = GET_MODE_WIDER_MODE (from_mode); + if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) + return false; + + op = smul_widen_optab; + handler = find_widening_optab_handler_and_mode (op, to_mode, + from_mode, 0, + &actual_mode); + + if (handler == CODE_FOR_nothing) + return false; + + from_unsigned1 = from_unsigned2 = false; + } + else + return false; +} /* Ensure that the inputs to the handler are in the correct precison for the opcode. This will be the full mode size. */ actual_precision = GET_MODE_PRECISION (actual_mode); - if (actual_precision != TYPE_PRECISION (type1)) + if (actual_precision != TYPE_PRECISION (type1) + || from_unsigned1 != TYPE_UNSIGNED (type1)) { tmp = create_tmp_var (build_nonstandard_integer_type -(actual_precision, TYPE_UNSIGNED (type1)), +(actual_precision, from_unsigned1), NULL); rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1); - +} + if (actual_precision != TYPE_PRECISION (type2) + || from_unsigned2 != TYPE_UNSIGNED (type2)) +{ /* Reuse the same type info, if possible. */ - if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) + if (!tmp || from_unsigned1 != from_unsigned2) tmp = create_tmp_var (build_nonstandard_integer_type -(actual_precision, TYPE_UNSIGNED (type2)), +(actual_precision, from_unsigned2), NULL); rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2); } @@ -2137,7 +2163,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, { gimple rhs1_stmt = NULL, rhs2_stmt = NULL; gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; - tree type, type1, type2, tmp; + tree type, type1, type2, optype, tmp = NULL; tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; optab this_optab; @@ -2146,6 +2172,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, enum machine_mode to_mode, from_mode, actual_mode; location_t loc = gimple_location (stmt); int actual_precision; + bool from_unsigned1, from_unsigned2; lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); @@ -2239,9 +2266,21 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, to_mode = TYPE_MODE (type); from_mode = TYPE_MODE (type1); + from_unsigned1 = TYPE_UNSIGNED (type1); + from_unsigned2 = TYPE_UNSIGNED (type2); - if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) -return false; + /* There's no such thing as a mixed sign madd yet, so use a wider mode. */ + if (from_unsigned1 != from_unsigned2) +{ + enum machine_mode mode = GET_MODE_WIDER_MODE (from_mode); + if (GET_MODE_PRECISION (mode) < GET_MODE_PRECISION (to_mode)) + { + from_mode = mode; + from_unsigned1 = from_unsigned2 = false; + } + else + return false; +} /* If there was a conversio
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On Thu, Jul 14, 2011 at 4:23 PM, Andrew Stubbs wrote: > On 12/07/11 15:07, Andrew Stubbs wrote: >> >> This update does the same thing as before, but updated for the changes >> earlier in the patch series. In particular, the build_and_insert_cast >> function and find_widening_optab_handler_and_mode changes have been >> moved up to patch 2. > > And this update changes the way the casts are handled, partly because it got > unwieldy towards the end of the patch series, and partly because I found a > few bugs. > > I've also ensured that it checks the precision of the types, rather than the > mode size to ensure that it is bitfield safe. > > OK? Ok. Thanks, Richard. > Andrew >
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 12/07/11 15:07, Andrew Stubbs wrote: This update does the same thing as before, but updated for the changes earlier in the patch series. In particular, the build_and_insert_cast function and find_widening_optab_handler_and_mode changes have been moved up to patch 2. And this update changes the way the casts are handled, partly because it got unwieldy towards the end of the patch series, and partly because I found a few bugs. I've also ensured that it checks the precision of the types, rather than the mode size to ensure that it is bitfield safe. OK? Andrew 2011-07-14 Andrew Stubbs gcc/ * tree-ssa-math-opts.c (convert_mult_to_widen): Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. gcc/testsuite/ * gcc.target/arm/wmul-6.c: New file. --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a" } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlal" } } */ --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2067,12 +2067,13 @@ is_widening_mult_p (gimple stmt, static bool convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) { - tree lhs, rhs1, rhs2, type, type1, type2, tmp; + tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL; enum insn_code handler; enum machine_mode to_mode, from_mode, actual_mode; optab op; int actual_precision; location_t loc = gimple_location (stmt); + bool from_unsigned1, from_unsigned2; lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); @@ -2084,10 +2085,12 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) to_mode = TYPE_MODE (type); from_mode = TYPE_MODE (type1); + from_unsigned1 = TYPE_UNSIGNED (type1); + from_unsigned2 = TYPE_UNSIGNED (type2); - if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2)) + if (from_unsigned1 && from_unsigned2) op = umul_widen_optab; - else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2)) + else if (!from_unsigned1 && !from_unsigned2) op = smul_widen_optab; else op = usmul_widen_optab; @@ -2096,22 +2099,45 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) 0, &actual_mode); if (handler == CODE_FOR_nothing) -return false; +{ + if (op != smul_widen_optab) + { + from_mode = GET_MODE_WIDER_MODE (from_mode); + if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) + return false; + + op = smul_widen_optab; + handler = find_widening_optab_handler_and_mode (op, to_mode, + from_mode, 0, + &actual_mode); + + if (handler == CODE_FOR_nothing) + return false; + + from_unsigned1 = from_unsigned2 = false; + } + else + return false; +} /* Ensure that the inputs to the handler are in the correct precison for the opcode. This will be the full mode size. */ actual_precision = GET_MODE_PRECISION (actual_mode); - if (actual_precision != TYPE_PRECISION (type1)) + if (actual_precision != TYPE_PRECISION (type1) + || from_unsigned1 != TYPE_UNSIGNED (type1)) { tmp = create_tmp_var (build_nonstandard_integer_type -(actual_precision, TYPE_UNSIGNED (type1)), +(actual_precision, from_unsigned1), NULL); rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1); - +} + if (actual_precision != TYPE_PRECISION (type2) + || from_unsigned2 != TYPE_UNSIGNED (type2)) +{ /* Reuse the same type info, if possible. */ - if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) + if (!tmp || from_unsigned1 != from_unsigned2) tmp = create_tmp_var (build_nonstandard_integer_type -(actual_precision, TYPE_UNSIGNED (type2)), +(actual_precision, from_unsigned2), NULL); rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2); } @@ -2136,7 +2162,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, { gimple rhs1_stmt = NULL, rhs2_stmt = NULL; gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt; - tree type, type1, type2, tmp; + tree type, type1, type2, optype, tmp = NULL; tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs; enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK; optab this_optab; @@ -2145,6 +2171,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, enum machine_mode to_mode, from_mode, actual_mode; location_t loc = gimple_location (stmt); int actual_precision; + bool from_unsigned1, from_unsigned2; lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); @@ -2238,9 +2265,21 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, to_mode = TYPE_MODE (type); from_mode = TYPE_MODE (type1); + from_unsigned1 = TYPE_UNSIGNED (type1); + from_unsigned2 = TYPE_UNSIGNED (type2); - if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) -ret
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 04/07/11 15:26, Andrew Stubbs wrote: On 28/06/11 15:14, Andrew Stubbs wrote: On 28/06/11 13:33, Andrew Stubbs wrote: On 23/06/11 15:41, Andrew Stubbs wrote: If one or both of the inputs to a widening multiply are of unsigned type then the compiler will attempt to use usmul_widen_optab or umul_widen_optab, respectively. That works fine, but only if the target supports those operations directly. Otherwise, it just bombs out and reverts to the normal inefficient non-widening multiply. This patch attempts to catch these cases and use an alternative signed widening multiply instruction, if one of those is available. I believe this should be legal as long as the top bit of both inputs is guaranteed to be zero. The code achieves this guarantee by zero-extending the inputs to a wider mode (which must still be narrower than the output mode). OK? This update fixes the testsuite issue Janis pointed out. And this one fixes up the wmul-5.c testcase also. The patch has changed the correct result. Here's an update for the context changed by the update to patch 3. The content of the patch has not changed. This update does the same thing as before, but updated for the changes earlier in the patch series. In particular, the build_and_insert_cast function and find_widening_optab_handler_and_mode changes have been moved up to patch 2. OK? Andrew 2011-07-12 Andrew Stubbs gcc/ * tree-ssa-math-opts.c (convert_mult_to_widen): Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. gcc/testsuite/ * gcc.target/arm/wmul-6.c: New file. --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a" } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlal" } } */ --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -2071,6 +2071,7 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) enum insn_code handler; enum machine_mode to_mode, from_mode; optab op; + bool do_cast = false; lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); @@ -2094,9 +2095,32 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) 0, &from_mode); if (handler == CODE_FOR_nothing) -return false; +{ + if (op != smul_widen_optab) + { + from_mode = GET_MODE_WIDER_MODE (from_mode); + if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode)) + return false; + + op = smul_widen_optab; + handler = find_widening_optab_handler_and_mode (op, to_mode, + from_mode, 0, + &from_mode); - if (from_mode != TYPE_MODE (type1)) + if (handler == CODE_FOR_nothing) + return false; + + type1 = build_nonstandard_integer_type ( + GET_MODE_PRECISION (from_mode), + 0); + type2 = type1; + do_cast = true; + } + else + return false; +} + + if (from_mode != TYPE_MODE (type1) || do_cast) { location_t loc = gimple_location (stmt); tree tmp1, tmp2; @@ -2143,6 +2167,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, enum tree_code wmult_code; enum insn_code handler; enum machine_mode from_mode; + bool do_cast = false; lhs = gimple_assign_lhs (stmt); type = TREE_TYPE (lhs); @@ -2234,8 +2259,21 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, else return false; + /* We don't support usmadd yet, so try a wider signed mode. */ if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2)) -return false; +{ + enum machine_mode mode = TYPE_MODE (type1); + mode = GET_MODE_WIDER_MODE (mode); + if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (TYPE_MODE (type))) + { + type1 = build_nonstandard_integer_type (GET_MODE_PRECISION (mode), + 0); + type2 = type1; + do_cast = true; + } + else + return false; +} /* If there was a conversion between the multiply and addition then we need to make sure it fits a multiply-and-accumulate. @@ -2276,7 +2314,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt, if (handler == CODE_FOR_nothing) return false; - if (TYPE_MODE (type1) != from_mode) + if (TYPE_MODE (type1) != from_mode || do_cast) { location_t loc = gimple_location (stmt); tree tmp;
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On Thu, Jul 7, 2011 at 12:41 PM, Andrew Stubbs wrote: > On 07/07/11 11:04, Richard Guenther wrote: >> >> Both types are equal, so please share the temporary variable you >> create >> >> + rhs1 = build_and_insert_cast (gsi, gimple_location (stmt), >> + create_tmp_var (type1, NULL), >> rhs1, type1); >> + rhs2 = build_and_insert_cast (gsi, gimple_location (stmt), >> + create_tmp_var (type2, NULL), >> rhs2, type2); >> >> here (CSE create_tmp_var). > > I'm sorry, I don't understand this? > > This takes code like this: > > r1 = a; > r2 = b; > result = r1 + r2; > > And transforms it to this: > > r1 = a; > r2 = b; > t1 = (type1) r1; > t2 = (type2) r2; > result = t1 + t2; > > Yes, type1 == type2, but r1 != r2, so t1 != t2. > > I don't see where the common expression is here? But then, I am something of > a newbie to tree optimizations. create_tmp_var creates a var-decl, build_and_insert_casts builds an SSA name from it. You can build multiple SSA names from a single VAR_DECL, so no need to waste two VAR_DECLs for temporaries of the same type. Richard. > Andrew >
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 07/07/11 11:04, Richard Guenther wrote: Both types are equal, so please share the temporary variable you create + rhs1 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type1, NULL), rhs1, type1); + rhs2 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type2, NULL), rhs2, type2); here (CSE create_tmp_var). I'm sorry, I don't understand this? This takes code like this: r1 = a; r2 = b; result = r1 + r2; And transforms it to this: r1 = a; r2 = b; t1 = (type1) r1; t2 = (type2) r2; result = t1 + t2; Yes, type1 == type2, but r1 != r2, so t1 != t2. I don't see where the common expression is here? But then, I am something of a newbie to tree optimizations. Andrew
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On Mon, Jul 4, 2011 at 4:26 PM, Andrew Stubbs wrote: > On 28/06/11 15:14, Andrew Stubbs wrote: >> >> On 28/06/11 13:33, Andrew Stubbs wrote: >>> >>> On 23/06/11 15:41, Andrew Stubbs wrote: If one or both of the inputs to a widening multiply are of unsigned type then the compiler will attempt to use usmul_widen_optab or umul_widen_optab, respectively. That works fine, but only if the target supports those operations directly. Otherwise, it just bombs out and reverts to the normal inefficient non-widening multiply. This patch attempts to catch these cases and use an alternative signed widening multiply instruction, if one of those is available. I believe this should be legal as long as the top bit of both inputs is guaranteed to be zero. The code achieves this guarantee by zero-extending the inputs to a wider mode (which must still be narrower than the output mode). OK? >>> >>> This update fixes the testsuite issue Janis pointed out. >> >> And this one fixes up the wmul-5.c testcase also. The patch has changed >> the correct result. > > Here's an update for the context changed by the update to patch 3. > > The content of the patch has not changed. + gimple stmt = gimple_build_assign (result, fold_convert (type, val)); please use gimple_build_assign_with_ops -convert_mult_to_widen (gimple stmt) +convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi) The comment needs updating for the new parameter. + type1 = type2 = lang_hooks.types.type_for_mode (from_mode, 0); don't use type_for_mode, use build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode), 0) instead. Both types are equal, so please share the temporary variable you create + rhs1 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type1, NULL), rhs1, type1); + rhs2 = build_and_insert_cast (gsi, gimple_location (stmt), + create_tmp_var (type2, NULL), rhs2, type2); here (CSE create_tmp_var). + type1 = type2 = lang_hooks.types.type_for_mode (mode, 0); + mult_rhs1 = build_and_insert_cast (gsi, gimple_location (stmt), +create_tmp_var (type1, NULL), +mult_rhs1, type1); + mult_rhs2 = build_and_insert_cast (gsi, gimple_location (stmt), +create_tmp_var (type2, NULL), +mult_rhs2, type2); Likewise. Thanks, Richard. > Andrew >
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 28/06/11 15:14, Andrew Stubbs wrote: On 28/06/11 13:33, Andrew Stubbs wrote: On 23/06/11 15:41, Andrew Stubbs wrote: If one or both of the inputs to a widening multiply are of unsigned type then the compiler will attempt to use usmul_widen_optab or umul_widen_optab, respectively. That works fine, but only if the target supports those operations directly. Otherwise, it just bombs out and reverts to the normal inefficient non-widening multiply. This patch attempts to catch these cases and use an alternative signed widening multiply instruction, if one of those is available. I believe this should be legal as long as the top bit of both inputs is guaranteed to be zero. The code achieves this guarantee by zero-extending the inputs to a wider mode (which must still be narrower than the output mode). OK? This update fixes the testsuite issue Janis pointed out. And this one fixes up the wmul-5.c testcase also. The patch has changed the correct result. Here's an update for the context changed by the update to patch 3. The content of the patch has not changed. Andrew 2011-07-04 Andrew Stubbs gcc/ * Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency. * optabs.c (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this, and add new argument 'found_mode'. * optabs.h (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this. (find_widening_optab_handler): New macro. * tree-ssa-math-opts.c: Include langhooks.h (build_and_insert_cast): New function. (convert_mult_to_widen): Add new argument 'gsi'. Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. (execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen. gcc/testsuite/ * gcc.target/arm/wmul-5.c: Update expected result. * gcc.target/arm/wmul-6.c: New file. --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \ $(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \ - $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h + $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \ + langhooks.h tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \ $(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \ --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1) non-widening optabs also. */ enum insn_code -find_widening_optab_handler (optab op, enum machine_mode to_mode, - enum machine_mode from_mode, - int permit_non_widening) +find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, + enum machine_mode from_mode, + int permit_non_widening, + enum machine_mode *found_mode) { for (; (permit_non_widening || from_mode != to_mode) && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) @@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode, from_mode); if (handler != CODE_FOR_nothing) - return handler; + { + if (found_mode) + *found_mode = from_mode; + return handler; + } } return CODE_FOR_nothing; --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); /* Find a widening optab even if it doesn't widen as much as we want. */ -extern enum insn_code find_widening_optab_handler (optab, enum machine_mode, - enum machine_mode, int); +#define find_widening_optab_handler(A,B,C,D) \ + find_widening_optab_handler_and_mode (A, B, C, D, NULL) +extern enum insn_code find_widening_optab_handler_and_mode (optab, + enum machine_mode, + enum machine_mode, + int, + enum machine_mode *); /* An extra flag to control optab_for_tree_code's behavior. This is needed to distinguish between machines with a vector shift that takes a scalar for the --- a/gcc/testsuite/gcc.target/arm/wmul-5.c +++ b/gcc/testsuite/gcc.target/arm/wmul-5.c @@ -7,4 +7,4 @@ foo (long long a, char *b, char *c) return a + *b * *c; } -/* { dg-final { scan-assembler "umlal" } } */ +/* { dg-final { scan-assembler "smlalbb" } } */ --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a" } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlal" } } */ --- a/gcc/tree-ssa-mat
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 28/06/11 13:33, Andrew Stubbs wrote: On 23/06/11 15:41, Andrew Stubbs wrote: If one or both of the inputs to a widening multiply are of unsigned type then the compiler will attempt to use usmul_widen_optab or umul_widen_optab, respectively. That works fine, but only if the target supports those operations directly. Otherwise, it just bombs out and reverts to the normal inefficient non-widening multiply. This patch attempts to catch these cases and use an alternative signed widening multiply instruction, if one of those is available. I believe this should be legal as long as the top bit of both inputs is guaranteed to be zero. The code achieves this guarantee by zero-extending the inputs to a wider mode (which must still be narrower than the output mode). OK? This update fixes the testsuite issue Janis pointed out. And this one fixes up the wmul-5.c testcase also. The patch has changed the correct result. Andrew 2011-06-28 Andrew Stubbs gcc/ * Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency. * optabs.c (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this, and add new argument 'found_mode'. * optabs.h (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this. (find_widening_optab_handler): New macro. * tree-ssa-math-opts.c: Include langhooks.h (build_and_insert_cast): New function. (convert_mult_to_widen): Add new argument 'gsi'. Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. (execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen. gcc/testsuite/ * gcc.target/arm/wmul-5.c: Update expected result. * gcc.target/arm/wmul-6.c: New file. --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \ $(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \ - $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h + $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \ + langhooks.h tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \ $(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \ --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1) non-widening optabs also. */ enum insn_code -find_widening_optab_handler (optab op, enum machine_mode to_mode, - enum machine_mode from_mode, - int permit_non_widening) +find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, + enum machine_mode from_mode, + int permit_non_widening, + enum machine_mode *found_mode) { for (; (permit_non_widening || from_mode != to_mode) && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) @@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode, from_mode); if (handler != CODE_FOR_nothing) - return handler; + { + if (found_mode) + *found_mode = from_mode; + return handler; + } } return CODE_FOR_nothing; --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); /* Find a widening optab even if it doesn't widen as much as we want. */ -extern enum insn_code find_widening_optab_handler (optab, enum machine_mode, - enum machine_mode, int); +#define find_widening_optab_handler(A,B,C,D) \ + find_widening_optab_handler_and_mode (A, B, C, D, NULL) +extern enum insn_code find_widening_optab_handler_and_mode (optab, + enum machine_mode, + enum machine_mode, + int, + enum machine_mode *); /* An extra flag to control optab_for_tree_code's behavior. This is needed to distinguish between machines with a vector shift that takes a scalar for the --- a/gcc/testsuite/gcc.target/arm/wmul-5.c +++ b/gcc/testsuite/gcc.target/arm/wmul-5.c @@ -7,4 +7,4 @@ foo (long long a, char *b, char *c) return a + *b * *c; } -/* { dg-final { scan-assembler "umlal" } } */ +/* { dg-final { scan-assembler "smlalbb" } } */ --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a" } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlal" } } */ --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. If not see #include "basic-block.h" #include "targe
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 06/23/2011 04:41 PM, Andrew Stubbs wrote: I believe this should be legal as long as the top bit of both inputs is guaranteed to be zero. The code achieves this guarantee by zero-extending the inputs to a wider mode (which must still be narrower than the output mode). Yes, that's correct. Paolo
Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies
On 23/06/11 15:41, Andrew Stubbs wrote: If one or both of the inputs to a widening multiply are of unsigned type then the compiler will attempt to use usmul_widen_optab or umul_widen_optab, respectively. That works fine, but only if the target supports those operations directly. Otherwise, it just bombs out and reverts to the normal inefficient non-widening multiply. This patch attempts to catch these cases and use an alternative signed widening multiply instruction, if one of those is available. I believe this should be legal as long as the top bit of both inputs is guaranteed to be zero. The code achieves this guarantee by zero-extending the inputs to a wider mode (which must still be narrower than the output mode). OK? This update fixes the testsuite issue Janis pointed out. Andrew 2011-06-28 Andrew Stubbs gcc/ * Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency. * optabs.c (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this, and add new argument 'found_mode'. * optabs.h (find_widening_optab_handler): Rename to ... (find_widening_optab_handler_and_mode): ... this. (find_widening_optab_handler): New macro. * tree-ssa-math-opts.c: Include langhooks.h (build_and_insert_cast): New function. (convert_mult_to_widen): Add new argument 'gsi'. Convert unsupported unsigned multiplies to signed. (convert_plusminus_to_widen): Likewise. (execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen. gcc/testsuite/ * gcc.target/arm/wmul-6.c: New file. --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \ $(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \ $(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \ - $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h + $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \ + langhooks.h tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \ $(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \ --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1) non-widening optabs also. */ enum insn_code -find_widening_optab_handler (optab op, enum machine_mode to_mode, - enum machine_mode from_mode, - int permit_non_widening) +find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode, + enum machine_mode from_mode, + int permit_non_widening, + enum machine_mode *found_mode) { for (; (permit_non_widening || from_mode != to_mode) && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode) @@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode, from_mode); if (handler != CODE_FOR_nothing) - return handler; + { + if (found_mode) + *found_mode = from_mode; + return handler; + } } return CODE_FOR_nothing; --- a/gcc/optabs.h +++ b/gcc/optabs.h @@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code); /* Find a widening optab even if it doesn't widen as much as we want. */ -extern enum insn_code find_widening_optab_handler (optab, enum machine_mode, - enum machine_mode, int); +#define find_widening_optab_handler(A,B,C,D) \ + find_widening_optab_handler_and_mode (A, B, C, D, NULL) +extern enum insn_code find_widening_optab_handler_and_mode (optab, + enum machine_mode, + enum machine_mode, + int, + enum machine_mode *); /* An extra flag to control optab_for_tree_code's behavior. This is needed to distinguish between machines with a vector shift that takes a scalar for the --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/wmul-6.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=armv7-a" } */ + +long long +foo (long long a, unsigned char *b, signed char *c) +{ + return a + (long long)*b * (long long)*c; +} + +/* { dg-final { scan-assembler "smlal" } } */ --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -98,6 +98,7 @@ along with GCC; see the file COPYING3. If not see #include "basic-block.h" #include "target.h" #include "gimple-pretty-print.h" +#include "langhooks.h" /* FIXME: RTL headers have to be included here for optabs. */ #include "rtl.h" /* Because optabs.h wants enum rtx_code. */ @@ -1086,6 +1087,21 @@ build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, return result; } +/* Build a gimple assignment to cast VAL to TYPE, and put the result in + TARGET. Insert the statement prior to GSI's current position