Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-10-03 Thread Paolo Bonzini
Il 30/09/2012 11:02, Richard Sandiford ha scritto:
> Uros Bizjak  writes:
>> On Thu, Sep 27, 2012 at 8:20 PM, Jakub Jelinek  wrote:
>>> On Thu, Sep 27, 2012 at 08:04:58PM +0200, Uros Bizjak wrote:
 After some off-line discussion with Richard, attached is v2 of the patch.

 2012-09-27  Uros Bizjak  

 PR rtl-optimization/54457
 * simplify-rtx.c (simplify_subreg):
   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>>>
>>> Is that a good idea even for WORD_REGISTER_OPERATIONS targets?
>>
>> I have bootstrapped and regtested [1] the patch on
>> alphaev68-pc-linux-gnu, a WORD_REGISTER_OPERATIONS target, and there
>> were no additional failures.
> 
> Thanks.  Given Jakub's question/concern, I'd really prefer a third
> opinion rather than approving it myself, but... OK if no-one objects
> within 24hrs.

I used to have a patch doing roughly the same thing (for more operations
but not MULT).  I never submitted because I didn't have the time to
audit all targets after changing the canonicalization.

Perhaps you can take the best of both worlds.

Paolo
change canonicalization

Index: gcc/Makefile.in
===
--- gcc/Makefile.in	(branch pr39726)
+++ gcc/Makefile.in	(working copy)
@@ -2844,7 +2844,7 @@ jump.o : jump.c $(CONFIG_H) $(SYSTEM_H) 
 simplify-rtx.o : simplify-rtx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
$(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) $(REAL_H) insn-config.h \
$(RECOG_H) $(EXPR_H) $(TOPLEV_H) output.h $(FUNCTION_H) $(GGC_H) $(TM_P_H) \
-   $(TREE_H) $(TARGET_H)
+   $(TREE_H) $(TARGET_H) $(OPTABS_H)
 cgraph.o : cgraph.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
langhooks.h $(TOPLEV_H) $(FLAGS_H) $(GGC_H) $(TARGET_H) $(CGRAPH_H) \
gt-cgraph.h output.h intl.h $(BASIC_BLOCK_H) debug.h $(HASHTAB_H) \
Index: gcc/simplify-rtx.c
===
--- gcc/simplify-rtx.c	(branch pr39726)
+++ gcc/simplify-rtx.c	(working copy)
@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.  
 #include "output.h"
 #include "ggc.h"
 #include "target.h"
+#include "optabs.h"
 
 /* Simplification and canonicalization of RTL.  */
 
@@ -5191,6 +5192,8 @@ rtx
 simplify_subreg (enum machine_mode outermode, rtx op,
 		 enum machine_mode innermode, unsigned int byte)
 {
+  int is_lowpart;
+
   /* Little bit of sanity checking.  */
   gcc_assert (innermode != VOIDmode);
   gcc_assert (outermode != VOIDmode);
@@ -5300,11 +5303,13 @@ simplify_subreg (enum machine_mode outer
   return NULL_RTX;
 }
 
+  is_lowpart = subreg_lowpart_offset (outermode, innermode) == byte;
+
   /* Merge implicit and explicit truncations.  */
 
   if (GET_CODE (op) == TRUNCATE
   && GET_MODE_SIZE (outermode) < GET_MODE_SIZE (innermode)
-  && subreg_lowpart_offset (outermode, innermode) == byte)
+  && is_lowpart)
 return simplify_gen_unary (TRUNCATE, outermode, XEXP (op, 0),
 			   GET_MODE (XEXP (op, 0)));
 
@@ -5343,7 +5348,7 @@ simplify_subreg (enum machine_mode outer
 	 The information is used only by alias analysis that can not
 	 grog partial register anyway.  */
 
-	  if (subreg_lowpart_offset (outermode, innermode) == byte)
+	  if (is_lowpart)
 	ORIGINAL_REGNO (x) = ORIGINAL_REGNO (op);
 	  return x;
 	}
@@ -5393,6 +5398,51 @@ simplify_subreg (enum machine_mode outer
   return NULL_RTX;
 }
 
+  /* Try to move a subreg inside an arithmetic operation.  */
+  if (is_lowpart && ARITHMETIC_P (op)
+  && GET_MODE_CLASS (outermode) == MODE_INT
+  && GET_MODE_CLASS (innermode) == MODE_INT)
+{
+  enum insn_code ic;
+  enum machine_mode cnt_mode;
+  switch (GET_CODE (op))
+	{
+	case ABS:
+	case NEG:
+	  return simplify_gen_unary (GET_CODE (op), outermode,
+ rtl_hooks.gen_lowpart_no_emit
+  (outermode, XEXP (op, 0)),
+ outermode);
+
+	case ASHIFT:
+	  /* i386 always uses QImode for the shift count.  Get the
+	 appropriate mode from the optab.  */
+	  ic = ashl_optab->handlers[outermode].insn_code;
+	  if (ic != CODE_FOR_nothing)
+	cnt_mode = insn_data[ic].operand[2].mode;
+	  else
+	cnt_mode = outermode;
+	  return simplify_gen_binary (GET_CODE (op), outermode,
+  rtl_hooks.gen_lowpart_no_emit
+   (outermode, XEXP (op, 0)),
+  rtl_hooks.gen_lowpart_no_emit
+   (cnt_mode, XEXP (op, 1)));
+
+	case PLUS:
+	case MINUS:
+	case AND:
+	case IOR:
+	case XOR:
+	  return simplify_gen_binary (GET_CODE (op), outermode,
+  rtl_hooks.gen_lowpart_no_emit
+   (outermode, XEXP (op, 0)),
+  rtl_hooks.gen_lowpart_no_emit
+   (outermode, XEXP (op, 1)));
+	default:
+	  break;
+	}
+}
+
   /* Optimize SUBREG truncations of zero and sign extended values.  */
   if ((GET_CODE (op) == ZERO_EXTEND
|| GET_CODE (op) == SIGN_EXTEND)
Index: gcc/combine.c

Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-10-02 Thread Richard Sandiford
Andrew Pinski  writes:
> On Thu, Sep 27, 2012 at 11:13 AM, Uros Bizjak  wrote:
>> 2012-09-27  Uros Bizjak  
>>
>> PR rtl-optimization/54457
>> * simplify-rtx.c (simplify_subreg):
>> Simplify (subreg:M (op:N ((x:N) (y:N)), 0)
>> to (op:M (subreg:M (x:N) 0) (subreg:M (x:N) 0)), where
>> the outer subreg is effectively a truncation to the original mode M.
>
>
> When I was doing something similar on our internal toolchain at
> Cavium.  I found doing this caused a regression on MIPS64 n32 in
> gcc.c-torture/execute/20040709-1.c Where:
>
>
> (insn 15 14 16 2 (set (reg/v:DI 200 [ y ])
> (reg:DI 2 $2)) t.c:16 301 {*movdi_64bit}
>  (expr_list:REG_DEAD (reg:DI 2 $2)
> (nil)))
>
> (insn 16 15 17 2 (set (reg:DI 210)
> (zero_extract:DI (reg/v:DI 200 [ y ])
> (const_int 29 [0x1d])
> (const_int 0 [0]))) t.c:16 249 {extzvdi}
>  (expr_list:REG_DEAD (reg/v:DI 200 [ y ])
> (nil)))
>
> (insn 17 16 23 2 (set (reg:SI 211)
> (truncate:SI (reg:DI 210))) t.c:16 175 {truncdisi2}
>  (expr_list:REG_DEAD (reg:DI 210)
> (nil)))
>
> Gets converted to:
> (insn 23 17 26 2 (set (reg/i:SI 2 $2)
> (and:SI (reg:SI 2 $2 [+4 ])
> (const_int 536870911 [0x1fff]))) t.c:18 156 {*andsi3}
>  (nil))
>
> Which is considered an ext instruction
>
> And with the Octeon simulator which causes undefined arguments to
> 32bit word operations to come out as 0xDEADBEEF which showed the
> regression.  I fixed it by changing it to produce TRUNCATE instead of
> the subreg.
>
> I did the simplification on ior/and rather than plus/minus/mult so the
> issue is only when expanding to this to and/ior.

Hmm, hadn't thought of that.  I think some of the existing subreg
optimisations suffer the same problem.  I.e. we can't assume that
subreg truncations of nested operands are OK just because the outer
subreg is OK.

I've got a patch I'm testing.

BTW, I haven't forgotten about your other ext patch.  Was hoping
to see whether we could finally take the opportunity to parameterise
the ext* patterns by mode, but got distracted with other patches.
Maybe I'll just have to admit I won't get time to try it for 4.8...

Richard


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-10-01 Thread Andrew Pinski
On Thu, Sep 27, 2012 at 11:13 AM, Uros Bizjak  wrote:
> On Thu, Sep 27, 2012 at 8:08 PM,   wrote:
>
>>> I agree (subreg:M (op:N A C) 0) to (op:M (subreg:N (A 0)) C) is
>>> a good transformation, but why do we need to handle as special
>>> the case where the subreg is itself the operand of a plus or minus?
>>> I think it should happen regardless of where the subreg occurs.
>>
>> Don't we need to restrict this to the low part though?
>
> ...
>>>
>>> After some off-line discussion with Richard, attached is v2 of the patch.
>>>
>>> 2012-09-27  Uros Bizjak  
>>>
>>>PR rtl-optimization/54457
>>>* simplify-rtx.c (simplify_subreg):
>>>   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>>>   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>>> ...
>>
>> Is it just specific to DI -> SI, or is it for any large mode -> smaller 
>> mode, like SI -> HI?
>
> Oh, I just copied v1 ChangeLog. The patch converts all modes where
> size of mode M < size of mode N. Updated ChangeLog reads:
>
> 2012-09-27  Uros Bizjak  
>
> PR rtl-optimization/54457
> * simplify-rtx.c (simplify_subreg):
> Simplify (subreg:M (op:N ((x:N) (y:N)), 0)
> to (op:M (subreg:M (x:N) 0) (subreg:M (x:N) 0)), where
> the outer subreg is effectively a truncation to the original mode M.


When I was doing something similar on our internal toolchain at
Cavium.  I found doing this caused a regression on MIPS64 n32 in
gcc.c-torture/execute/20040709-1.c Where:


(insn 15 14 16 2 (set (reg/v:DI 200 [ y ])
(reg:DI 2 $2)) t.c:16 301 {*movdi_64bit}
 (expr_list:REG_DEAD (reg:DI 2 $2)
(nil)))

(insn 16 15 17 2 (set (reg:DI 210)
(zero_extract:DI (reg/v:DI 200 [ y ])
(const_int 29 [0x1d])
(const_int 0 [0]))) t.c:16 249 {extzvdi}
 (expr_list:REG_DEAD (reg/v:DI 200 [ y ])
(nil)))

(insn 17 16 23 2 (set (reg:SI 211)
(truncate:SI (reg:DI 210))) t.c:16 175 {truncdisi2}
 (expr_list:REG_DEAD (reg:DI 210)
(nil)))

Gets converted to:
(insn 23 17 26 2 (set (reg/i:SI 2 $2)
(and:SI (reg:SI 2 $2 [+4 ])
(const_int 536870911 [0x1fff]))) t.c:18 156 {*andsi3}
 (nil))

Which is considered an ext instruction

And with the Octeon simulator which causes undefined arguments to
32bit word operations to come out as 0xDEADBEEF which showed the
regression.  I fixed it by changing it to produce TRUNCATE instead of
the subreg.

I did the simplification on ior/and rather than plus/minus/mult so the
issue is only when expanding to this to and/ior.

Thanks,
Andrew Pinski



>
> testsuite/ChangeLog:
>
> 2012-09-27  Uros Bizjak  
>
> PR rtl-optimization/54457
> * gcc.target/i386/pr54457.c: New test.
>
> Uros.


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-30 Thread Richard Sandiford
Uros Bizjak  writes:
> On Thu, Sep 27, 2012 at 8:20 PM, Jakub Jelinek  wrote:
>> On Thu, Sep 27, 2012 at 08:04:58PM +0200, Uros Bizjak wrote:
>>> After some off-line discussion with Richard, attached is v2 of the patch.
>>>
>>> 2012-09-27  Uros Bizjak  
>>>
>>> PR rtl-optimization/54457
>>> * simplify-rtx.c (simplify_subreg):
>>>   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>>>   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>>
>> Is that a good idea even for WORD_REGISTER_OPERATIONS targets?
>
> I have bootstrapped and regtested [1] the patch on
> alphaev68-pc-linux-gnu, a WORD_REGISTER_OPERATIONS target, and there
> were no additional failures.

Thanks.  Given Jakub's question/concern, I'd really prefer a third
opinion rather than approving it myself, but... OK if no-one objects
within 24hrs.

Richard


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-28 Thread Uros Bizjak
On Thu, Sep 27, 2012 at 8:20 PM, Jakub Jelinek  wrote:
> On Thu, Sep 27, 2012 at 08:04:58PM +0200, Uros Bizjak wrote:
>> After some off-line discussion with Richard, attached is v2 of the patch.
>>
>> 2012-09-27  Uros Bizjak  
>>
>> PR rtl-optimization/54457
>> * simplify-rtx.c (simplify_subreg):
>>   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>>   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>
> Is that a good idea even for WORD_REGISTER_OPERATIONS targets?

I have bootstrapped and regtested [1] the patch on
alphaev68-pc-linux-gnu, a WORD_REGISTER_OPERATIONS target, and there
were no additional failures.

[1] http://gcc.gnu.org/ml/gcc-testresults/2012-09/msg02828.html

Uros.


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-27 Thread Richard Sandiford
Jakub Jelinek  writes:
> On Thu, Sep 27, 2012 at 08:04:58PM +0200, Uros Bizjak wrote:
>> After some off-line discussion with Richard, attached is v2 of the patch.
>> 
>> 2012-09-27  Uros Bizjak  
>> 
>> PR rtl-optimization/54457
>> * simplify-rtx.c (simplify_subreg):
>>  Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>>  to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>
> Is that a good idea even for WORD_REGISTER_OPERATIONS targets?

I think so.  Admittedly it means I'm going to have to change the
mips.md BADDU patterns from:

(define_insn "*baddu_si_el"
  [(set (match_operand:SI 0 "register_operand" "=d")
(zero_extend:SI
 (subreg:QI
  (plus:SI (match_operand:SI 1 "register_operand" "d")
   (match_operand:SI 2 "register_operand" "d")) 0)))]
  "ISA_HAS_BADDU && !BYTES_BIG_ENDIAN"
  "baddu\\t%0,%1,%2"
  [(set_attr "alu_type" "add")])

to:

(define_insn "*baddu_si_el"
  [(set (match_operand:SI 0 "register_operand" "=d")
(zero_extend:SI
 (plus:QI (match_operand:QI 1 "register_operand" "d")
  (match_operand:QI 2 "register_operand" "d"]
  "ISA_HAS_BADDU"
  "baddu\\t%0,%1,%2"
  [(set_attr "alu_type" "add")])

But really, that's a better representation even on MIPS,
not least because it gets rid of the ugly endianness condition.

There will probably be fallout on other targets too.
But the upside is that we get rid of more subregs from .mds.
I think a few of the i386.md define_peephole2s could go too.
E.g. the second two of:

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
  (plus:SI (match_operand:SI 1 "register_operand")
   (match_operand:SI 2 "nonmemory_operand"]
  "TARGET_64BIT && !TARGET_OPT_AGU
   && REGNO (operands[0]) == REGNO (operands[1])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
   (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2
  (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
  (plus:SI (match_operand:SI 1 "nonmemory_operand")
   (match_operand:SI 2 "register_operand"]
  "TARGET_64BIT && !TARGET_OPT_AGU
   && REGNO (operands[0]) == REGNO (operands[2])
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1
  (clobber (reg:CC FLAGS_REG))])])

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
  (subreg:SI (plus:DI (match_dup 0)
  (match_operand:DI 1 "nonmemory_operand")) 0)))]
  "TARGET_64BIT && !TARGET_OPT_AGU
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1
  (clobber (reg:CC FLAGS_REG))])]
{
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = gen_lowpart (SImode, operands[0]);
})

(define_peephole2
  [(set (match_operand:DI 0 "register_operand")
(zero_extend:DI
  (subreg:SI (plus:DI (match_operand:DI 1 "nonmemory_operand")
  (match_dup 0)) 0)))]
  "TARGET_64BIT && !TARGET_OPT_AGU
   && peep2_regno_dead_p (0, FLAGS_REG)"
  [(parallel [(set (match_dup 0)
   (zero_extend:DI (plus:SI (match_dup 2) (match_dup 1
  (clobber (reg:CC FLAGS_REG))])]
{
  operands[1] = gen_lowpart (SImode, operands[1]);
  operands[2] = gen_lowpart (SImode, operands[0]);
})

where we should now always generate the first two forms.

There's no semantic difference between the two rtxes, and I think
it would be confusing to have different canonical forms on different
targets.  If the caller really wants a word-mode operation on
WORD_REGISTER_OPERATIONS targets, then I think it's asking for
the wrong thing by taking this subreg.

Richard


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-27 Thread Jakub Jelinek
On Thu, Sep 27, 2012 at 08:04:58PM +0200, Uros Bizjak wrote:
> After some off-line discussion with Richard, attached is v2 of the patch.
> 
> 2012-09-27  Uros Bizjak  
> 
> PR rtl-optimization/54457
> * simplify-rtx.c (simplify_subreg):
>   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).

Is that a good idea even for WORD_REGISTER_OPERATIONS targets?

> --- simplify-rtx.c(revision 191808)
> +++ simplify-rtx.c(working copy)
> @@ -5689,6 +5689,28 @@ simplify_subreg (enum machine_mode outermode, rtx
>   return CONST0_RTX (outermode);
>  }
>  
> +  /* Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
> + to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)), where
> + the outer subreg is effectively a truncation to the original mode.  */
> +  if ((GET_CODE (op) == PLUS
> +   || GET_CODE (op) == MINUS
> +   || GET_CODE (op) == MULT)
> +  && SCALAR_INT_MODE_P (outermode)
> +  && SCALAR_INT_MODE_P (innermode)
> +  && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)
> +  && byte == subreg_lowpart_offset (outermode, innermode))
> +{
> +  rtx op0 = simplify_gen_subreg (outermode, XEXP (op, 0),
> + innermode, byte);
> +  if (op0)
> +{
> +  rtx op1 = simplify_gen_subreg (outermode, XEXP (op, 1),
> + innermode, byte);
> +  if (op1)
> +return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
> +}
> +}
> +
>/* Simplify (subreg:QI (lshiftrt:SI (sign_extend:SI (x:QI)) C), 0) into
>   to (ashiftrt:QI (x:QI) C), where C is a suitable small constant and
>   the outer subreg is effectively a truncation to the original mode.  */

Jakub


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-27 Thread Uros Bizjak
On Thu, Sep 27, 2012 at 8:08 PM,   wrote:

>> I agree (subreg:M (op:N A C) 0) to (op:M (subreg:N (A 0)) C) is
>> a good transformation, but why do we need to handle as special
>> the case where the subreg is itself the operand of a plus or minus?
>> I think it should happen regardless of where the subreg occurs.
>
> Don't we need to restrict this to the low part though?

 ...
>>
>> After some off-line discussion with Richard, attached is v2 of the patch.
>>
>> 2012-09-27  Uros Bizjak  
>>
>>PR rtl-optimization/54457
>>* simplify-rtx.c (simplify_subreg):
>>   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>>   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
>> ...
>
> Is it just specific to DI -> SI, or is it for any large mode -> smaller mode, 
> like SI -> HI?

Oh, I just copied v1 ChangeLog. The patch converts all modes where
size of mode M < size of mode N. Updated ChangeLog reads:

2012-09-27  Uros Bizjak  

PR rtl-optimization/54457
* simplify-rtx.c (simplify_subreg):
Simplify (subreg:M (op:N ((x:N) (y:N)), 0)
to (op:M (subreg:M (x:N) 0) (subreg:M (x:N) 0)), where
the outer subreg is effectively a truncation to the original mode M.

testsuite/ChangeLog:

2012-09-27  Uros Bizjak  

PR rtl-optimization/54457
* gcc.target/i386/pr54457.c: New test.

Uros.


Re: [PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-27 Thread Paul_Koning

On Sep 27, 2012, at 2:04 PM, Uros Bizjak wrote:

> 
> 
> 
> I agree (subreg:M (op:N A C) 0) to (op:M (subreg:N (A 0)) C) is
> a good transformation, but why do we need to handle as special
> the case where the subreg is itself the operand of a plus or minus?
> I think it should happen regardless of where the subreg occurs.
 
 Don't we need to restrict this to the low part though?
>>> 
>>> ...
> 
> After some off-line discussion with Richard, attached is v2 of the patch.
> 
> 2012-09-27  Uros Bizjak  
> 
>PR rtl-optimization/54457
>* simplify-rtx.c (simplify_subreg):
>   Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
>   to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).
> ...

Is it just specific to DI -> SI, or is it for any large mode -> smaller mode, 
like SI -> HI?

paul




[PATCH v2, rtl-optimization]: Fix PR54457, [x32] Fail to combine 64bit index + constant

2012-09-27 Thread Uros Bizjak
On Thu, Sep 27, 2012 at 4:25 PM, Richard Sandiford
 wrote:

 I agree (subreg:M (op:N A C) 0) to (op:M (subreg:N (A 0)) C) is
 a good transformation, but why do we need to handle as special
 the case where the subreg is itself the operand of a plus or minus?
 I think it should happen regardless of where the subreg occurs.
>>>
>>> Don't we need to restrict this to the low part though?
>>
>> I have tried this approach with attached patch.  Unfortunately,
>> although it survived bootstrap without libjava on x86_64, it failed
>> building libjava with:
>>
>> /home/uros/gcc-svn/trunk/libjava/classpath/javax/swing/plaf/basic/BasicSliderUI.java:1299:0:
>> error: insn does not satisfy its constraints:
>>}
>>  ^
>> (insn 237 398 399 7 (set (reg:SI 1 dx [125])
>> (plus:SI (subreg:SI (mult:DI (reg:DI 1 dx [orig:72 D.78627 ] [72])
>> (const_int 2 [0x2])) 0)
>> (reg:SI 5 di)))
>> /home/uros/gcc-svn/trunk/libjava/classpath/javax/swing/plaf/basic/BasicSliderUI.java:1271
>> 240 {*leasi}
>>  (expr_list:REG_DEAD (reg:DI 5 di)
>> (nil)))
>>
>> Original RTX was (subreg:SI (plus:DI (mult:DI (...) reg:DI))), which
>> is valid RTX pattern for lea insn, the above is not.
>>
>> Due to these problems, I think the safer approach is to limit the
>> transformation to (plus:SI (subreg:SI (plus:DI (...) 0)) RTXes, as was
>> the case with original patch. This approach would fix a specific
>> problem where simplify_plus_minus is not able to simplify the combined
>> RTX at combine time. Please note, that combined RTXes are always
>> checked for correctness at combine pass.
>
> I think instead the (subreg (plus ...)) handling should be applied
> to (subreg (mult ...)) too.  IMO the correct form of the above address
> ought to be:
>
> (set (reg:SI 1 dx [125])
>  (plus:SI (mult:SI (reg:SI 1 dx [orig:72 D.78627 ] [72])
>(const_int 2 [0x2]))
>   (reg:SI 5 di))

Great, this works as expected!

After some off-line discussion with Richard, attached is v2 of the patch.

2012-09-27  Uros Bizjak  

PR rtl-optimization/54457
* simplify-rtx.c (simplify_subreg):
Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)).

testsuite/ChangeLog:

2012-09-27  Uros Bizjak  

PR rtl-optimization/54457
* gcc.target/i386/pr54457.c: New test.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32}.

BTW: I propose that we start with limited selection of opcodes, so x32
autotester will pick and test the patch with SImode addresses.

OK for mainline?

Uros.
Index: simplify-rtx.c
===
--- simplify-rtx.c  (revision 191808)
+++ simplify-rtx.c  (working copy)
@@ -5689,6 +5689,28 @@ simplify_subreg (enum machine_mode outermode, rtx
return CONST0_RTX (outermode);
 }
 
+  /* Simplify (subreg:SI (op:DI ((x:DI) (y:DI)), 0)
+ to (op:SI (subreg:SI (x:DI) 0) (subreg:SI (x:DI) 0)), where
+ the outer subreg is effectively a truncation to the original mode.  */
+  if ((GET_CODE (op) == PLUS
+   || GET_CODE (op) == MINUS
+   || GET_CODE (op) == MULT)
+  && SCALAR_INT_MODE_P (outermode)
+  && SCALAR_INT_MODE_P (innermode)
+  && GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode)
+  && byte == subreg_lowpart_offset (outermode, innermode))
+{
+  rtx op0 = simplify_gen_subreg (outermode, XEXP (op, 0),
+ innermode, byte);
+  if (op0)
+{
+  rtx op1 = simplify_gen_subreg (outermode, XEXP (op, 1),
+ innermode, byte);
+  if (op1)
+return simplify_gen_binary (GET_CODE (op), outermode, op0, op1);
+}
+}
+
   /* Simplify (subreg:QI (lshiftrt:SI (sign_extend:SI (x:QI)) C), 0) into
  to (ashiftrt:QI (x:QI) C), where C is a suitable small constant and
  the outer subreg is effectively a truncation to the original mode.  */
Index: testsuite/gcc.target/i386/pr54457.c
===
--- testsuite/gcc.target/i386/pr54457.c (revision 0)
+++ testsuite/gcc.target/i386/pr54457.c (working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-options "-O2 -mx32 -maddress-mode=short" } */
+
+extern char array[40];
+
+char foo (long long position)
+{
+  return array[position + 1];
+}
+
+/* { dg-final { scan-assembler-not "add\[lq\]?\[^\n\]*1" } } */