Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-08-19 Thread Andrew Stubbs

On 14/07/11 15:25, Richard Guenther wrote:

Ok.


Committed, with no real changes. I just updated the testcase a little.

Andrew
2011-08-19  Andrew Stubbs  

	gcc/
	* tree-ssa-math-opts.c (convert_mult_to_widen): Convert
	unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.

	gcc/testsuite/
	* gcc.target/arm/wmul-6.c: New file.

--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm_dsp } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlalbb" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2068,12 +2068,13 @@ is_widening_mult_p (gimple stmt,
 static bool
 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
-  tree lhs, rhs1, rhs2, type, type1, type2, tmp;
+  tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
   enum insn_code handler;
   enum machine_mode to_mode, from_mode, actual_mode;
   optab op;
   int actual_precision;
   location_t loc = gimple_location (stmt);
+  bool from_unsigned1, from_unsigned2;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2085,10 +2086,12 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 
   to_mode = TYPE_MODE (type);
   from_mode = TYPE_MODE (type1);
+  from_unsigned1 = TYPE_UNSIGNED (type1);
+  from_unsigned2 = TYPE_UNSIGNED (type2);
 
-  if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
+  if (from_unsigned1 && from_unsigned2)
 op = umul_widen_optab;
-  else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
+  else if (!from_unsigned1 && !from_unsigned2)
 op = smul_widen_optab;
   else
 op = usmul_widen_optab;
@@ -2097,22 +2100,45 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 		  0, &actual_mode);
 
   if (handler == CODE_FOR_nothing)
-return false;
+{
+  if (op != smul_widen_optab)
+	{
+	  from_mode = GET_MODE_WIDER_MODE (from_mode);
+	  if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
+	return false;
+
+	  op = smul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+			  from_mode, 0,
+			  &actual_mode);
+
+	  if (handler == CODE_FOR_nothing)
+	return false;
+
+	  from_unsigned1 = from_unsigned2 = false;
+	}
+  else
+	return false;
+}
 
   /* Ensure that the inputs to the handler are in the correct precison
  for the opcode.  This will be the full mode size.  */
   actual_precision = GET_MODE_PRECISION (actual_mode);
-  if (actual_precision != TYPE_PRECISION (type1))
+  if (actual_precision != TYPE_PRECISION (type1)
+  || from_unsigned1 != TYPE_UNSIGNED (type1))
 {
   tmp = create_tmp_var (build_nonstandard_integer_type
-(actual_precision, TYPE_UNSIGNED (type1)),
+(actual_precision, from_unsigned1),
 			NULL);
   rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
-
+}
+  if (actual_precision != TYPE_PRECISION (type2)
+  || from_unsigned2 != TYPE_UNSIGNED (type2))
+{
   /* Reuse the same type info, if possible.  */
-  if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+  if (!tmp || from_unsigned1 != from_unsigned2)
 	tmp = create_tmp_var (build_nonstandard_integer_type
-(actual_precision, TYPE_UNSIGNED (type2)),
+(actual_precision, from_unsigned2),
 			  NULL);
   rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
 }
@@ -2137,7 +2163,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 {
   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
-  tree type, type1, type2, tmp;
+  tree type, type1, type2, optype, tmp = NULL;
   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
   optab this_optab;
@@ -2146,6 +2172,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   enum machine_mode to_mode, from_mode, actual_mode;
   location_t loc = gimple_location (stmt);
   int actual_precision;
+  bool from_unsigned1, from_unsigned2;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2239,9 +2266,21 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 
   to_mode = TYPE_MODE (type);
   from_mode = TYPE_MODE (type1);
+  from_unsigned1 = TYPE_UNSIGNED (type1);
+  from_unsigned2 = TYPE_UNSIGNED (type2);
 
-  if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
-return false;
+  /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
+  if (from_unsigned1 != from_unsigned2)
+{
+  enum machine_mode mode = GET_MODE_WIDER_MODE (from_mode);
+  if (GET_MODE_PRECISION (mode) < GET_MODE_PRECISION (to_mode))
+	{
+	  from_mode = mode;
+	  from_unsigned1 = from_unsigned2 = false;
+	}
+  else
+	return false;
+}
 
   /* If there was a conversio

Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-14 Thread Richard Guenther
On Thu, Jul 14, 2011 at 4:23 PM, Andrew Stubbs  wrote:
> On 12/07/11 15:07, Andrew Stubbs wrote:
>>
>> This update does the same thing as before, but updated for the changes
>> earlier in the patch series. In particular, the build_and_insert_cast
>> function and find_widening_optab_handler_and_mode changes have been
>> moved up to patch 2.
>
> And this update changes the way the casts are handled, partly because it got
> unwieldy towards the end of the patch series, and partly because I found a
> few bugs.
>
> I've also ensured that it checks the precision of the types, rather than the
> mode size to ensure that it is bitfield safe.
>
> OK?

Ok.

Thanks,
Richard.

> Andrew
>


Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-14 Thread Andrew Stubbs

On 12/07/11 15:07, Andrew Stubbs wrote:

This update does the same thing as before, but updated for the changes
earlier in the patch series. In particular, the build_and_insert_cast
function and find_widening_optab_handler_and_mode changes have been
moved up to patch 2.


And this update changes the way the casts are handled, partly because it 
got unwieldy towards the end of the patch series, and partly because I 
found a few bugs.


I've also ensured that it checks the precision of the types, rather than 
the mode size to ensure that it is bitfield safe.


OK?

Andrew
2011-07-14  Andrew Stubbs  

	gcc/
	* tree-ssa-math-opts.c (convert_mult_to_widen): Convert
	unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.

	gcc/testsuite/
	* gcc.target/arm/wmul-6.c: New file.

--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2067,12 +2067,13 @@ is_widening_mult_p (gimple stmt,
 static bool
 convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 {
-  tree lhs, rhs1, rhs2, type, type1, type2, tmp;
+  tree lhs, rhs1, rhs2, type, type1, type2, tmp = NULL;
   enum insn_code handler;
   enum machine_mode to_mode, from_mode, actual_mode;
   optab op;
   int actual_precision;
   location_t loc = gimple_location (stmt);
+  bool from_unsigned1, from_unsigned2;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2084,10 +2085,12 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 
   to_mode = TYPE_MODE (type);
   from_mode = TYPE_MODE (type1);
+  from_unsigned1 = TYPE_UNSIGNED (type1);
+  from_unsigned2 = TYPE_UNSIGNED (type2);
 
-  if (TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2))
+  if (from_unsigned1 && from_unsigned2)
 op = umul_widen_optab;
-  else if (!TYPE_UNSIGNED (type1) && !TYPE_UNSIGNED (type2))
+  else if (!from_unsigned1 && !from_unsigned2)
 op = smul_widen_optab;
   else
 op = usmul_widen_optab;
@@ -2096,22 +2099,45 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 		  0, &actual_mode);
 
   if (handler == CODE_FOR_nothing)
-return false;
+{
+  if (op != smul_widen_optab)
+	{
+	  from_mode = GET_MODE_WIDER_MODE (from_mode);
+	  if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
+	return false;
+
+	  op = smul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+			  from_mode, 0,
+			  &actual_mode);
+
+	  if (handler == CODE_FOR_nothing)
+	return false;
+
+	  from_unsigned1 = from_unsigned2 = false;
+	}
+  else
+	return false;
+}
 
   /* Ensure that the inputs to the handler are in the correct precison
  for the opcode.  This will be the full mode size.  */
   actual_precision = GET_MODE_PRECISION (actual_mode);
-  if (actual_precision != TYPE_PRECISION (type1))
+  if (actual_precision != TYPE_PRECISION (type1)
+  || from_unsigned1 != TYPE_UNSIGNED (type1))
 {
   tmp = create_tmp_var (build_nonstandard_integer_type
-(actual_precision, TYPE_UNSIGNED (type1)),
+(actual_precision, from_unsigned1),
 			NULL);
   rhs1 = build_and_insert_cast (gsi, loc, tmp, rhs1);
-
+}
+  if (actual_precision != TYPE_PRECISION (type2)
+  || from_unsigned2 != TYPE_UNSIGNED (type2))
+{
   /* Reuse the same type info, if possible.  */
-  if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
+  if (!tmp || from_unsigned1 != from_unsigned2)
 	tmp = create_tmp_var (build_nonstandard_integer_type
-(actual_precision, TYPE_UNSIGNED (type2)),
+(actual_precision, from_unsigned2),
 			  NULL);
   rhs2 = build_and_insert_cast (gsi, loc, tmp, rhs2);
 }
@@ -2136,7 +2162,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 {
   gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
   gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
-  tree type, type1, type2, tmp;
+  tree type, type1, type2, optype, tmp = NULL;
   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
   optab this_optab;
@@ -2145,6 +2171,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   enum machine_mode to_mode, from_mode, actual_mode;
   location_t loc = gimple_location (stmt);
   int actual_precision;
+  bool from_unsigned1, from_unsigned2;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2238,9 +2265,21 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
 
   to_mode = TYPE_MODE (type);
   from_mode = TYPE_MODE (type1);
+  from_unsigned1 = TYPE_UNSIGNED (type1);
+  from_unsigned2 = TYPE_UNSIGNED (type2);
 
-  if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
-ret

Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-12 Thread Andrew Stubbs

On 04/07/11 15:26, Andrew Stubbs wrote:

On 28/06/11 15:14, Andrew Stubbs wrote:

On 28/06/11 13:33, Andrew Stubbs wrote:

On 23/06/11 15:41, Andrew Stubbs wrote:

If one or both of the inputs to a widening multiply are of unsigned
type
then the compiler will attempt to use usmul_widen_optab or
umul_widen_optab, respectively.

That works fine, but only if the target supports those operations
directly. Otherwise, it just bombs out and reverts to the normal
inefficient non-widening multiply.

This patch attempts to catch these cases and use an alternative signed
widening multiply instruction, if one of those is available.

I believe this should be legal as long as the top bit of both inputs is
guaranteed to be zero. The code achieves this guarantee by
zero-extending the inputs to a wider mode (which must still be narrower
than the output mode).

OK?


This update fixes the testsuite issue Janis pointed out.


And this one fixes up the wmul-5.c testcase also. The patch has changed
the correct result.


Here's an update for the context changed by the update to patch 3.

The content of the patch has not changed.


This update does the same thing as before, but updated for the changes 
earlier in the patch series. In particular, the build_and_insert_cast 
function and find_widening_optab_handler_and_mode changes have been 
moved up to patch 2.


OK?

Andrew
2011-07-12  Andrew Stubbs  

	gcc/
	* tree-ssa-math-opts.c (convert_mult_to_widen): Convert
	unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.

	gcc/testsuite/
	* gcc.target/arm/wmul-6.c: New file.

--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2071,6 +2071,7 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
   enum insn_code handler;
   enum machine_mode to_mode, from_mode;
   optab op;
+  bool do_cast = false;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2094,9 +2095,32 @@ convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)
 		  0, &from_mode);
 
   if (handler == CODE_FOR_nothing)
-return false;
+{
+  if (op != smul_widen_optab)
+	{
+	  from_mode = GET_MODE_WIDER_MODE (from_mode);
+	  if (GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
+	return false;
+
+	  op = smul_widen_optab;
+	  handler = find_widening_optab_handler_and_mode (op, to_mode,
+			  from_mode, 0,
+			  &from_mode);
 
-  if (from_mode != TYPE_MODE (type1))
+	  if (handler == CODE_FOR_nothing)
+	return false;
+
+	  type1 = build_nonstandard_integer_type (
+	GET_MODE_PRECISION (from_mode),
+	0);
+	  type2 = type1;
+	  do_cast = true;
+	}
+  else
+	return false;
+}
+
+  if (from_mode != TYPE_MODE (type1) || do_cast)
 {
   location_t loc = gimple_location (stmt);
   tree tmp1, tmp2;
@@ -2143,6 +2167,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   enum tree_code wmult_code;
   enum insn_code handler;
   enum machine_mode from_mode;
+  bool do_cast = false;
 
   lhs = gimple_assign_lhs (stmt);
   type = TREE_TYPE (lhs);
@@ -2234,8 +2259,21 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   else
 return false;
 
+  /* We don't support usmadd yet, so try a wider signed mode.  */
   if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
-return false;
+{
+  enum machine_mode mode = TYPE_MODE (type1);
+  mode = GET_MODE_WIDER_MODE (mode);
+  if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (TYPE_MODE (type)))
+	{
+	  type1 = build_nonstandard_integer_type (GET_MODE_PRECISION (mode),
+		  0);
+	  type2 = type1;
+	  do_cast = true;
+	}
+  else
+	return false;
+}
 
   /* If there was a conversion between the multiply and addition
  then we need to make sure it fits a multiply-and-accumulate.
@@ -2276,7 +2314,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
   if (handler == CODE_FOR_nothing)
 return false;
 
-  if (TYPE_MODE (type1) != from_mode)
+  if (TYPE_MODE (type1) != from_mode || do_cast)
 {
   location_t loc = gimple_location (stmt);
   tree tmp;


Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-07 Thread Richard Guenther
On Thu, Jul 7, 2011 at 12:41 PM, Andrew Stubbs  wrote:
> On 07/07/11 11:04, Richard Guenther wrote:
>>
>> Both types are equal, so please share the temporary variable you
>> create
>>
>> +         rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
>> +                                       create_tmp_var (type1, NULL),
>> rhs1, type1);
>> +         rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
>> +                                       create_tmp_var (type2, NULL),
>> rhs2, type2);
>>
>> here (CSE create_tmp_var).
>
> I'm sorry, I don't understand this?
>
> This takes code like this:
>
>  r1 = a;
>  r2 = b;
>  result = r1 + r2;
>
> And transforms it to this:
>
>  r1 = a;
>  r2 = b;
>  t1 = (type1) r1;
>  t2 = (type2) r2;
>  result = t1 + t2;
>
> Yes, type1 == type2, but r1 != r2, so t1 != t2.
>
> I don't see where the common expression is here? But then, I am something of
> a newbie to tree optimizations.

create_tmp_var creates a var-decl, build_and_insert_casts builds an
SSA name from it.  You can build multiple SSA names from a single
VAR_DECL, so no need to waste two VAR_DECLs for temporaries
of the same type.

Richard.

> Andrew
>


Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-07 Thread Andrew Stubbs

On 07/07/11 11:04, Richard Guenther wrote:

Both types are equal, so please share the temporary variable you
create

+ rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+   create_tmp_var (type1, NULL),
rhs1, type1);
+ rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+   create_tmp_var (type2, NULL),
rhs2, type2);

here (CSE create_tmp_var).


I'm sorry, I don't understand this?

This takes code like this:

  r1 = a;
  r2 = b;
  result = r1 + r2;

And transforms it to this:

  r1 = a;
  r2 = b;
  t1 = (type1) r1;
  t2 = (type2) r2;
  result = t1 + t2;

Yes, type1 == type2, but r1 != r2, so t1 != t2.

I don't see where the common expression is here? But then, I am 
something of a newbie to tree optimizations.


Andrew


Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-07 Thread Richard Guenther
On Mon, Jul 4, 2011 at 4:26 PM, Andrew Stubbs  wrote:
> On 28/06/11 15:14, Andrew Stubbs wrote:
>>
>> On 28/06/11 13:33, Andrew Stubbs wrote:
>>>
>>> On 23/06/11 15:41, Andrew Stubbs wrote:

 If one or both of the inputs to a widening multiply are of unsigned type
 then the compiler will attempt to use usmul_widen_optab or
 umul_widen_optab, respectively.

 That works fine, but only if the target supports those operations
 directly. Otherwise, it just bombs out and reverts to the normal
 inefficient non-widening multiply.

 This patch attempts to catch these cases and use an alternative signed
 widening multiply instruction, if one of those is available.

 I believe this should be legal as long as the top bit of both inputs is
 guaranteed to be zero. The code achieves this guarantee by
 zero-extending the inputs to a wider mode (which must still be narrower
 than the output mode).

 OK?
>>>
>>> This update fixes the testsuite issue Janis pointed out.
>>
>> And this one fixes up the wmul-5.c testcase also. The patch has changed
>> the correct result.
>
> Here's an update for the context changed by the update to patch 3.
>
> The content of the patch has not changed.

+  gimple stmt = gimple_build_assign (result, fold_convert (type, val));

please use gimple_build_assign_with_ops

-convert_mult_to_widen (gimple stmt)
+convert_mult_to_widen (gimple stmt, gimple_stmt_iterator *gsi)

The comment needs updating for the new parameter.

+ type1 = type2 = lang_hooks.types.type_for_mode (from_mode, 0);

don't use type_for_mode, use build_nonstandard_integer_type
(GET_MODE_PRECISION (from_mode), 0) instead.

Both types are equal, so please share the temporary variable you
create

+ rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+   create_tmp_var (type1, NULL),
rhs1, type1);
+ rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+   create_tmp_var (type2, NULL),
rhs2, type2);

here (CSE create_tmp_var).

+ type1 = type2 = lang_hooks.types.type_for_mode (mode, 0);
+ mult_rhs1 = build_and_insert_cast (gsi, gimple_location (stmt),
+create_tmp_var (type1, NULL),
+mult_rhs1, type1);
+ mult_rhs2 = build_and_insert_cast (gsi, gimple_location (stmt),
+create_tmp_var (type2, NULL),
+mult_rhs2, type2);

Likewise.

Thanks,
Richard.

> Andrew
>


Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-07-04 Thread Andrew Stubbs

On 28/06/11 15:14, Andrew Stubbs wrote:

On 28/06/11 13:33, Andrew Stubbs wrote:

On 23/06/11 15:41, Andrew Stubbs wrote:

If one or both of the inputs to a widening multiply are of unsigned type
then the compiler will attempt to use usmul_widen_optab or
umul_widen_optab, respectively.

That works fine, but only if the target supports those operations
directly. Otherwise, it just bombs out and reverts to the normal
inefficient non-widening multiply.

This patch attempts to catch these cases and use an alternative signed
widening multiply instruction, if one of those is available.

I believe this should be legal as long as the top bit of both inputs is
guaranteed to be zero. The code achieves this guarantee by
zero-extending the inputs to a wider mode (which must still be narrower
than the output mode).

OK?


This update fixes the testsuite issue Janis pointed out.


And this one fixes up the wmul-5.c testcase also. The patch has changed
the correct result.


Here's an update for the context changed by the update to patch 3.

The content of the patch has not changed.

Andrew
2011-07-04  Andrew Stubbs  

	gcc/
	* Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency.
	* optabs.c (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this, and add new
	argument 'found_mode'.
	* optabs.h (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this.
	(find_widening_optab_handler): New macro.
	* tree-ssa-math-opts.c: Include langhooks.h
	(build_and_insert_cast): New function.
	(convert_mult_to_widen): Add new argument 'gsi'.
	Convert unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.
	(execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen.

	gcc/testsuite/
	* gcc.target/arm/wmul-5.c: Update expected result.
	* gcc.target/arm/wmul-6.c: New file.

--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \
$(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \
-   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h
+   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \
+   langhooks.h
 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \
$(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
non-widening optabs also.  */
 
 enum insn_code
-find_widening_optab_handler (optab op, enum machine_mode to_mode,
-			 enum machine_mode from_mode,
-			 int permit_non_widening)
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+  enum machine_mode from_mode,
+  int permit_non_widening,
+  enum machine_mode *found_mode)
 {
   for (; (permit_non_widening || from_mode != to_mode)
 	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
@@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode,
 		   from_mode);
 
   if (handler != CODE_FOR_nothing)
-	return handler;
+	{
+	  if (found_mode)
+	*found_mode = from_mode;
+	  return handler;
+	}
 }
 
   return CODE_FOR_nothing;
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
 /* Find a widening optab even if it doesn't widen as much as we want.  */
-extern enum insn_code find_widening_optab_handler (optab, enum machine_mode,
-		   enum machine_mode, int);
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+			enum machine_mode,
+			enum machine_mode,
+			int,
+			enum machine_mode *);
 
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
distinguish between machines with a vector shift that takes a scalar for the
--- a/gcc/testsuite/gcc.target/arm/wmul-5.c
+++ b/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -7,4 +7,4 @@ foo (long long a, char *b, char *c)
   return a + *b * *c;
 }
 
-/* { dg-final { scan-assembler "umlal" } } */
+/* { dg-final { scan-assembler "smlalbb" } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-mat

Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-06-28 Thread Andrew Stubbs

On 28/06/11 13:33, Andrew Stubbs wrote:

On 23/06/11 15:41, Andrew Stubbs wrote:

If one or both of the inputs to a widening multiply are of unsigned type
then the compiler will attempt to use usmul_widen_optab or
umul_widen_optab, respectively.

That works fine, but only if the target supports those operations
directly. Otherwise, it just bombs out and reverts to the normal
inefficient non-widening multiply.

This patch attempts to catch these cases and use an alternative signed
widening multiply instruction, if one of those is available.

I believe this should be legal as long as the top bit of both inputs is
guaranteed to be zero. The code achieves this guarantee by
zero-extending the inputs to a wider mode (which must still be narrower
than the output mode).

OK?


This update fixes the testsuite issue Janis pointed out.


And this one fixes up the wmul-5.c testcase also. The patch has changed 
the correct result.


Andrew
2011-06-28  Andrew Stubbs  

	gcc/
	* Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency.
	* optabs.c (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this, and add new
	argument 'found_mode'.
	* optabs.h (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this.
	(find_widening_optab_handler): New macro.
	* tree-ssa-math-opts.c: Include langhooks.h
	(build_and_insert_cast): New function.
	(convert_mult_to_widen): Add new argument 'gsi'.
	Convert unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.
	(execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen.

	gcc/testsuite/
	* gcc.target/arm/wmul-5.c: Update expected result.
	* gcc.target/arm/wmul-6.c: New file.

--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \
$(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \
-   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h
+   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \
+   langhooks.h
 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \
$(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
non-widening optabs also.  */
 
 enum insn_code
-find_widening_optab_handler (optab op, enum machine_mode to_mode,
-			 enum machine_mode from_mode,
-			 int permit_non_widening)
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+  enum machine_mode from_mode,
+  int permit_non_widening,
+  enum machine_mode *found_mode)
 {
   for (; (permit_non_widening || from_mode != to_mode)
 	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
@@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode,
 		   from_mode);
 
   if (handler != CODE_FOR_nothing)
-	return handler;
+	{
+	  if (found_mode)
+	*found_mode = from_mode;
+	  return handler;
+	}
 }
 
   return CODE_FOR_nothing;
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
 /* Find a widening optab even if it doesn't widen as much as we want.  */
-extern enum insn_code find_widening_optab_handler (optab, enum machine_mode,
-		   enum machine_mode, int);
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+			enum machine_mode,
+			enum machine_mode,
+			int,
+			enum machine_mode *);
 
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
distinguish between machines with a vector shift that takes a scalar for the
--- a/gcc/testsuite/gcc.target/arm/wmul-5.c
+++ b/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -7,4 +7,4 @@ foo (long long a, char *b, char *c)
   return a + *b * *c;
 }
 
-/* { dg-final { scan-assembler "umlal" } } */
+/* { dg-final { scan-assembler "smlalbb" } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "basic-block.h"
 #include "targe

Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-06-28 Thread Paolo Bonzini

On 06/23/2011 04:41 PM, Andrew Stubbs wrote:


I believe this should be legal as long as the top bit of both inputs is
guaranteed to be zero. The code achieves this guarantee by
zero-extending the inputs to a wider mode (which must still be narrower
than the output mode).


Yes, that's correct.

Paolo


Re: [PATCH (4/7)] Unsigned multiplies using wider signed multiplies

2011-06-28 Thread Andrew Stubbs

On 23/06/11 15:41, Andrew Stubbs wrote:

If one or both of the inputs to a widening multiply are of unsigned type
then the compiler will attempt to use usmul_widen_optab or
umul_widen_optab, respectively.

That works fine, but only if the target supports those operations
directly. Otherwise, it just bombs out and reverts to the normal
inefficient non-widening multiply.

This patch attempts to catch these cases and use an alternative signed
widening multiply instruction, if one of those is available.

I believe this should be legal as long as the top bit of both inputs is
guaranteed to be zero. The code achieves this guarantee by
zero-extending the inputs to a wider mode (which must still be narrower
than the output mode).

OK?


This update fixes the testsuite issue Janis pointed out.

Andrew
2011-06-28  Andrew Stubbs  

	gcc/
	* Makefile.in (tree-ssa-math-opts.o): Add langhooks.h dependency.
	* optabs.c (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this, and add new
	argument 'found_mode'.
	* optabs.h (find_widening_optab_handler): Rename to ...
	(find_widening_optab_handler_and_mode): ... this.
	(find_widening_optab_handler): New macro.
	* tree-ssa-math-opts.c: Include langhooks.h
	(build_and_insert_cast): New function.
	(convert_mult_to_widen): Add new argument 'gsi'.
	Convert unsupported unsigned multiplies to signed.
	(convert_plusminus_to_widen): Likewise.
	(execute_optimize_widening_mul): Pass gsi to convert_mult_to_widen.

	gcc/testsuite/
	* gcc.target/arm/wmul-6.c: New file.

--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -2672,7 +2672,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
 tree-ssa-math-opts.o : tree-ssa-math-opts.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(FLAGS_H) $(TREE_H) $(TREE_FLOW_H) $(TIMEVAR_H) \
$(TREE_PASS_H) alloc-pool.h $(BASIC_BLOCK_H) $(TARGET_H) \
-   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h
+   $(DIAGNOSTIC_H) $(RTL_H) $(EXPR_H) $(OPTABS_H) gimple-pretty-print.h \
+   langhooks.h
 tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) $(TREE_INLINE_H) $(FLAGS_H) \
$(FUNCTION_H) $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -232,9 +232,10 @@ add_equal_note (rtx insns, rtx target, enum rtx_code code, rtx op0, rtx op1)
non-widening optabs also.  */
 
 enum insn_code
-find_widening_optab_handler (optab op, enum machine_mode to_mode,
-			 enum machine_mode from_mode,
-			 int permit_non_widening)
+find_widening_optab_handler_and_mode (optab op, enum machine_mode to_mode,
+  enum machine_mode from_mode,
+  int permit_non_widening,
+  enum machine_mode *found_mode)
 {
   for (; (permit_non_widening || from_mode != to_mode)
 	 && GET_MODE_SIZE (from_mode) <= GET_MODE_SIZE (to_mode)
@@ -245,7 +246,11 @@ find_widening_optab_handler (optab op, enum machine_mode to_mode,
 		   from_mode);
 
   if (handler != CODE_FOR_nothing)
-	return handler;
+	{
+	  if (found_mode)
+	*found_mode = from_mode;
+	  return handler;
+	}
 }
 
   return CODE_FOR_nothing;
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -808,8 +808,13 @@ extern void emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 extern bool maybe_emit_unop_insn (enum insn_code, rtx, rtx, enum rtx_code);
 
 /* Find a widening optab even if it doesn't widen as much as we want.  */
-extern enum insn_code find_widening_optab_handler (optab, enum machine_mode,
-		   enum machine_mode, int);
+#define find_widening_optab_handler(A,B,C,D) \
+  find_widening_optab_handler_and_mode (A, B, C, D, NULL)
+extern enum insn_code find_widening_optab_handler_and_mode (optab,
+			enum machine_mode,
+			enum machine_mode,
+			int,
+			enum machine_mode *);
 
 /* An extra flag to control optab_for_tree_code's behavior.  This is needed to
distinguish between machines with a vector shift that takes a scalar for the
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-6.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, unsigned char *b, signed char *c)
+{
+  return a + (long long)*b * (long long)*c;
+}
+
+/* { dg-final { scan-assembler "smlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -98,6 +98,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "basic-block.h"
 #include "target.h"
 #include "gimple-pretty-print.h"
+#include "langhooks.h"
 
 /* FIXME: RTL headers have to be included here for optabs.  */
 #include "rtl.h"		/* Because optabs.h wants enum rtx_code.  */
@@ -1086,6 +1087,21 @@ build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
   return result;
 }
 
+/* Build a gimple assignment to cast VAL to TYPE, and put the result in
+   TARGET.  Insert the statement prior to GSI's current position