Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2014-01-08 Thread Eric Botcazou
 Committed after private email approval from Jakub.  I made one
 additional trivial change (missing whitespace in a comment).

This breaks bootstrap with RTL checking enabled:

/home/eric/svn/gcc/libgcc/config/libbid/bid64_noncomp.c:119:1: internal 
compiler error: RTL check: expected code 'set' or 'clobber', have 'parallel' 
in combine_reaching_defs, at ree.c:711
 }
 ^
0x9c5fcf rtl_check_failed_code2(rtx_def const*, rtx_code, rtx_code, char 
const*, int, char const*)
/home/eric/svn/gcc/gcc/rtl.c:783
0x14626da combine_reaching_defs
/home/eric/svn/gcc/gcc/ree.c:711
0x1464ae9 find_and_remove_re
/home/eric/svn/gcc/gcc/ree.c:957
0x1464ae9 rest_of_handle_ree
/home/eric/svn/gcc/gcc/ree.c:1019
0x1464ae9 execute
/home/eric/svn/gcc/gcc/ree.c:1058
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See http://gcc.gnu.org/bugs.html for instructions.
make[3]: *** [bid64_noncomp.o] Error 1
make[3]: *** Waiting for unfinished jobs

-- 
Eric Botcazou


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2014-01-08 Thread Jeff Law

On 01/08/14 01:14, Eric Botcazou wrote:

Committed after private email approval from Jakub.  I made one
additional trivial change (missing whitespace in a comment).


This breaks bootstrap with RTL checking enabled:

[ ... ]
Thanks.  I'm on it.
jeff



Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2014-01-08 Thread Jeff Law

On 01/08/14 01:14, Eric Botcazou wrote:

Committed after private email approval from Jakub.  I made one
additional trivial change (missing whitespace in a comment).


This breaks bootstrap with RTL checking enabled:

/home/eric/svn/gcc/libgcc/config/libbid/bid64_noncomp.c:119:1: internal
compiler error: RTL check: expected code 'set' or 'clobber', have 'parallel'
in combine_reaching_defs, at ree.c:711
There were two issues in that code.  The first assumed the form of 
DEF_INSN was (set (dest) (src)), the second assumed that the destination 
must be a reg before checking its REGNO.


ree.c already had some code which effectively defined the form that the 
defining insn could take.  It's not quite single_set, though I'd 
really prefer that be the form in the future.  Anyway, I pulled that 
code out of merge_def_and_ext so that it could also be used by 
combine_reaching_defs.


With that I was able to bootstrap  regression test with 
--enable-checking=rtl as well as a normal bootstrap and regression test 
on x86_64-unknown-linux-gnu.


OK for the trunk?


* ree.c (get_sub_rtx): New function, extracted from...
(merge_def_and_ext): Here.
(combine_reaching_defs): Use get_sub_rtx.


diff --git a/gcc/ree.c b/gcc/ree.c
index ec09c7a..b41e891 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -580,27 +580,17 @@ make_defs_and_copies_lists (rtx extend_insn, const_rtx 
set_pat,
   return ret;
 }
 
-/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
-   on the SET pattern.  */
-
-static bool
-merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+static rtx *
+get_sub_rtx (rtx def_insn)
 {
-  enum machine_mode ext_src_mode;
-  enum rtx_code code;
-  rtx *sub_rtx;
-  rtx s_expr;
-  int i;
-
-  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand-expr), 0));
-  code = GET_CODE (PATTERN (def_insn));
-  sub_rtx = NULL;
+  enum rtx_code code = GET_CODE (PATTERN (def_insn));
+  rtx *sub_rtx = NULL;
 
   if (code == PARALLEL)
 {
-  for (i = 0; i  XVECLEN (PATTERN (def_insn), 0); i++)
+  for (int i = 0; i  XVECLEN (PATTERN (def_insn), 0); i++)
 {
-  s_expr = XVECEXP (PATTERN (def_insn), 0, i);
+  rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
   if (GET_CODE (s_expr) != SET)
 continue;
 
@@ -609,7 +599,7 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state 
*state)
   else
 {
   /* PARALLEL with multiple SETs.  */
-  return false;
+  return NULL;
 }
 }
 }
@@ -618,10 +608,27 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, 
ext_state *state)
   else
 {
   /* It is not a PARALLEL or a SET, what could it be ? */
-  return false;
+  return NULL;
 }
 
   gcc_assert (sub_rtx != NULL);
+  return sub_rtx;
+}
+
+/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
+   on the SET pattern.  */
+
+static bool
+merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+{
+  enum machine_mode ext_src_mode;
+  rtx *sub_rtx;
+
+  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand-expr), 0));
+  sub_rtx = get_sub_rtx (def_insn);
+
+  if (sub_rtx == NULL)
+return false;
 
   if (REG_P (SET_DEST (*sub_rtx))
(GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
@@ -707,8 +714,13 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
   /* If there is an overlap between the destination of DEF_INSN and
 CAND-insn, then this transformation is not safe.  Note we have
 to test in the widened mode.  */
+  rtx *dest_sub_rtx = get_sub_rtx (def_insn);
+  if (dest_sub_rtx == NULL
+ || !REG_P (SET_DEST (*dest_sub_rtx)))
+   return false;
+
   rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand-insn))),
-REGNO (SET_DEST (PATTERN (def_insn;
+REGNO (SET_DEST (*dest_sub_rtx)));
   if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand-insn
return false;
 


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2014-01-08 Thread Jakub Jelinek
On Wed, Jan 08, 2014 at 04:02:17PM -0700, Jeff Law wrote:
   * ree.c (get_sub_rtx): New function, extracted from...
   (merge_def_and_ext): Here.
   (combine_reaching_defs): Use get_sub_rtx.

 --- a/gcc/ree.c
 +++ b/gcc/ree.c
 @@ -580,27 +580,17 @@ make_defs_and_copies_lists (rtx extend_insn, const_rtx 
 set_pat,
return ret;
  }
  
 -/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
 -   on the SET pattern.  */
 -
 -static bool
 -merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
 +static rtx *
 +get_sub_rtx (rtx def_insn)

Please add a function comment for it (perhaps saying that it is like
single_set but never allows more than one SET).

Ok with that change.

Jakub


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2014-01-08 Thread Jeff Law

On 01/08/14 16:13, Jakub Jelinek wrote:


Please add a function comment for it (perhaps saying that it is like
single_set but never allows more than one SET).

Ok with that change.

My bad.  Thanks for catching it.

Attaching the installed patch for reference.

Jeff
commit 12e467a657653de484178f0d0d66786a92d62ebe
Author: law law@138bc75d-0d04-0410-961f-82ee72b054a4
Date:   Thu Jan 9 04:42:38 2014 +

* ree.c (get_sub_rtx): New function, extracted from...
(merge_def_and_ext): Here.
(combine_reaching_defs): Use get_sub_rtx.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@206454 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f63918e..e4872f2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2014-01-08  Jeff Law  l...@redhat.com
+
+   * ree.c (get_sub_rtx): New function, extracted from...
+   (merge_def_and_ext): Here.
+   (combine_reaching_defs): Use get_sub_rtx.
+
 2014-01-08  Eric Botcazou  ebotca...@adacore.com
 
* cgraph.h (varpool_variable_node): Do not choke on null node.
diff --git a/gcc/ree.c b/gcc/ree.c
index ec09c7a..1c4f3ad 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -580,27 +580,21 @@ make_defs_and_copies_lists (rtx extend_insn, const_rtx 
set_pat,
   return ret;
 }
 
-/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
-   on the SET pattern.  */
-
-static bool
-merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+/* If DEF_INSN has single SET expression, possibly buried inside
+   a PARALLEL, return the address of the SET expression, else
+   return NULL.  This is similar to single_set, except that
+   single_set allows multiple SETs when all but one is dead.  */
+static rtx *
+get_sub_rtx (rtx def_insn)
 {
-  enum machine_mode ext_src_mode;
-  enum rtx_code code;
-  rtx *sub_rtx;
-  rtx s_expr;
-  int i;
-
-  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand-expr), 0));
-  code = GET_CODE (PATTERN (def_insn));
-  sub_rtx = NULL;
+  enum rtx_code code = GET_CODE (PATTERN (def_insn));
+  rtx *sub_rtx = NULL;
 
   if (code == PARALLEL)
 {
-  for (i = 0; i  XVECLEN (PATTERN (def_insn), 0); i++)
+  for (int i = 0; i  XVECLEN (PATTERN (def_insn), 0); i++)
 {
-  s_expr = XVECEXP (PATTERN (def_insn), 0, i);
+  rtx s_expr = XVECEXP (PATTERN (def_insn), 0, i);
   if (GET_CODE (s_expr) != SET)
 continue;
 
@@ -609,7 +603,7 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state 
*state)
   else
 {
   /* PARALLEL with multiple SETs.  */
-  return false;
+  return NULL;
 }
 }
 }
@@ -618,10 +612,27 @@ merge_def_and_ext (ext_cand *cand, rtx def_insn, 
ext_state *state)
   else
 {
   /* It is not a PARALLEL or a SET, what could it be ? */
-  return false;
+  return NULL;
 }
 
   gcc_assert (sub_rtx != NULL);
+  return sub_rtx;
+}
+
+/* Merge the DEF_INSN with an extension.  Calls combine_set_extension
+   on the SET pattern.  */
+
+static bool
+merge_def_and_ext (ext_cand *cand, rtx def_insn, ext_state *state)
+{
+  enum machine_mode ext_src_mode;
+  rtx *sub_rtx;
+
+  ext_src_mode = GET_MODE (XEXP (SET_SRC (cand-expr), 0));
+  sub_rtx = get_sub_rtx (def_insn);
+
+  if (sub_rtx == NULL)
+return false;
 
   if (REG_P (SET_DEST (*sub_rtx))
(GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
@@ -707,8 +718,13 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
   /* If there is an overlap between the destination of DEF_INSN and
 CAND-insn, then this transformation is not safe.  Note we have
 to test in the widened mode.  */
+  rtx *dest_sub_rtx = get_sub_rtx (def_insn);
+  if (dest_sub_rtx == NULL
+ || !REG_P (SET_DEST (*dest_sub_rtx)))
+   return false;
+
   rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand-insn))),
-REGNO (SET_DEST (PATTERN (def_insn;
+REGNO (SET_DEST (*dest_sub_rtx)));
   if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand-insn
return false;
 


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2014-01-07 Thread Jeff Law

On 12/20/13 13:44, Jeff Law wrote:

On 12/20/13 10:25, Jakub Jelinek wrote:

Yes.  So my suggestion actually was not correct for that:
!reg_overlap_mentioned_p (dest, XEXP (src, 0))
because the first extension above has r1:SI and r2:DI which don't
overlap, only r1:DI and r2:DI overlap.  So it probably should be checked
in combine_reaching_defs instead where you have already both the
registers
in the right modes available and can call reg_overlap_mentioned_p on them
directly.  One argument would be SET_DEST (def_insn) and one SET_DEST
(cand-insn), right?

Here's the updated version.

1. Minor test tweak per Uros's suggestion.
2. Fix formatting
3. Add testing for two destinations overlapping per above.

Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  Ok for
the trunk?
Committed after private email approval from Jakub.  I made one 
additional trivial change (missing whitespace in a comment).


jeff


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Uros Bizjak
Hello!

index 000..5375b61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53623.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { x86_64-*-* } } } */
+/* { dg-options -O2 -fdump-rtl-ree } */

Please use:

/* { dg-do compile { target { ! ia32 } } } */

Uros.


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jakub Jelinek
On Thu, Dec 19, 2013 at 09:57:36PM -0700, Jeff Law wrote:
   * ree.c (combine_set_extension): Handle case where source
   and destination registers in an extension insn are different.
   (combine_reaching_defs): Allow source and destination
   registers in extension to be different under limited
   circumstances.
   (add_removable_extension): Remove restriction that the
   source and destination registers in the extension are the
   same.
   (find_and_remove_re): Emit a copy from the extension's
   destination to its source after the defining insn if
   the source and destination registers are different.
 
 
 testsuite/
 
   * gcc.target/i386/pr53623.c: New test.

Thanks for working on this, the only thing I'd worry about are
HARD_REGNO_NREGS  1 registers if the two hard regs might overlap.
Perhaps it is fine as is and dunno how many targets actually allow partial
overlap in between the multi-register REGs.  If you aren't sure this
would be handled properly, perhaps the check could go to
add_removable_extension below the REG_P check add
   !reg_overlap_mentioned_p (dest, XEXP (src, 0))

 diff --git a/gcc/ree.c b/gcc/ree.c
 index 9938e98..63ad86c 100644
 --- a/gcc/ree.c
 +++ b/gcc/ree.c
 @@ -282,9 +282,21 @@ static bool
  combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
  {
rtx orig_src = SET_SRC (*orig_set);
 -  rtx new_reg = gen_rtx_REG (cand-mode, REGNO (SET_DEST (*orig_set)));
rtx new_set;
  
 +  /* If the extension's source/destination registers are not the same
 + then we need to change the original load to reference the destination
 + of the extension.  Then we need to emit a copy from that destination
 + to the original destination of the load.  */
 +  rtx new_reg;
 +  bool copy_needed
 += REGNO (SET_DEST (PATTERN (cand-insn)))
 +  != REGNO (XEXP (SET_SRC (PATTERN (cand-insn)), 0));

Perhaps the right formatting here would be
  bool copy_needed
= (REGNO (SET_DEST (PATTERN (cand-insn)))
   != REGNO (XEXP (SET_SRC (PATTERN (cand-insn)), 0)));
? ()s for emacs, and aligning != under REGNO.

 +  if (copy_needed)
 +new_reg = gen_rtx_REG (cand-mode, REGNO (SET_DEST (PATTERN 
 (cand-insn;

Too long line.

Looks good to me otherwise.

Jakub


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jeff Law

On 12/20/13 01:17, Uros Bizjak wrote:

Hello!

index 000..5375b61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr53623.c
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { x86_64-*-* } } } */
+/* { dg-options -O2 -fdump-rtl-ree } */

Please use:

/* { dg-do compile { target { ! ia32 } } } */

Done.

jeff


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jeff Law

On 12/20/13 01:24, Jakub Jelinek wrote:


Thanks for working on this, the only thing I'd worry about are
HARD_REGNO_NREGS  1 registers if the two hard regs might overlap.
The reg_set_between_p and reg_used_between_p calls when you dig down 
into them eventually use reg_overlap_mentioned_p which should do the 
right thing in this regard.


I'll audit ree for problems of this nature.



+  /* If the extension's source/destination registers are not the same
+ then we need to change the original load to reference the destination
+ of the extension.  Then we need to emit a copy from that destination
+ to the original destination of the load.  */
+  rtx new_reg;
+  bool copy_needed
+= REGNO (SET_DEST (PATTERN (cand-insn)))
+!= REGNO (XEXP (SET_SRC (PATTERN (cand-insn)), 0));


Perhaps the right formatting here would be
   bool copy_needed
 = (REGNO (SET_DEST (PATTERN (cand-insn)))
!= REGNO (XEXP (SET_SRC (PATTERN (cand-insn)), 0)));
? ()s for emacs, and aligning != under REGNO.

Agreed.  I was factoring out that test and didn't fix the formatting.




+  if (copy_needed)
+new_reg = gen_rtx_REG (cand-mode, REGNO (SET_DEST (PATTERN 
(cand-insn;


Too long line.

Ugh.  80 exactly.  There are times I hate our conventions :(

jeff


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jakub Jelinek
On Fri, Dec 20, 2013 at 09:26:10AM -0700, Jeff Law wrote:
 Thanks for working on this, the only thing I'd worry about are
 HARD_REGNO_NREGS  1 registers if the two hard regs might overlap.
 The reg_set_between_p and reg_used_between_p calls when you dig down
 into them eventually use reg_overlap_mentioned_p which should do the
 right thing in this regard.
 
 I'll audit ree for problems of this nature.

The two reg_*_between_p functions check only insns in between the two, but
not the insns themselves.  What I meant is if it e.g. couldn't be possible
to have HARD_REGNO_NREGS == 2 registers say 1 and 2, where the first insn
would load into the low half of 1, then one insn that say sign extends
1 into {2,3}, then perhaps {2,3} is used and finally 1 is zero extended
into {1,2} and that is used later on.  For little endian this would work,
while after the transformation which sign extends the memory into {2,3}
and then copies that into {1,2} (thus overwriting content of low half of
{2,3} with the high half of it).  Perhaps the RA will never allow this.

Jakub


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jeff Law

On 12/20/13 09:45, Jakub Jelinek wrote:

On Fri, Dec 20, 2013 at 09:26:10AM -0700, Jeff Law wrote:

Thanks for working on this, the only thing I'd worry about are
HARD_REGNO_NREGS  1 registers if the two hard regs might overlap.

The reg_set_between_p and reg_used_between_p calls when you dig down
into them eventually use reg_overlap_mentioned_p which should do the
right thing in this regard.

I'll audit ree for problems of this nature.


The two reg_*_between_p functions check only insns in between the two, but
not the insns themselves.  What I meant is if it e.g. couldn't be possible
to have HARD_REGNO_NREGS == 2 registers say 1 and 2, where the first insn
would load into the low half of 1, then one insn that say sign extends
1 into {2,3}, then perhaps {2,3} is used and finally 1 is zero extended
into {1,2} and that is used later on.  For little endian this would work,
while after the transformation which sign extends the memory into {2,3}
and then copies that into {1,2} (thus overwriting content of low half of
{2,3} with the high half of it).  Perhaps the RA will never allow this.
ISTM if we're presented with something like that (and I don't think 
there's anything in RA which explicitly disallows such code), then what 
we have to evaluate is whether or not the transformation preserves the 
semantics.


So, incoming would look like this (assuming a 32 bit target):


r1:SI = mem:SI
r2:DI = sext:DI (r1:SI)
[ Use r2/r3 ]
r1:DI = zext:DI (r1:SI)

And that would be transformed into:

r2:DI = sext:DI (mem:SI)
r1:DI = r2:DI
[ Use r2/r3 ]
r1:DI = zext:DI (r1:SI)

Where r2 will have the wrong value in the use statements.  ISTM we can 
check for an overlap between the destination of the memory load and the 
destination of the first extension.  Right?


Is that the case you're worrying about?

jeff


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jakub Jelinek
On Fri, Dec 20, 2013 at 10:17:10AM -0700, Jeff Law wrote:
 ISTM if we're presented with something like that (and I don't think
 there's anything in RA which explicitly disallows such code), then
 what we have to evaluate is whether or not the transformation
 preserves the semantics.
 
 So, incoming would look like this (assuming a 32 bit target):
 
 
 r1:SI = mem:SI
 r2:DI = sext:DI (r1:SI)
 [ Use r2/r3 ]
 r1:DI = zext:DI (r1:SI)
 
 And that would be transformed into:
 
 r2:DI = sext:DI (mem:SI)
 r1:DI = r2:DI
 [ Use r2/r3 ]
 r1:DI = zext:DI (r1:SI)
 
 Where r2 will have the wrong value in the use statements.  ISTM we
 can check for an overlap between the destination of the memory load
 and the destination of the first extension.  Right?
 
 Is that the case you're worrying about?

Yes.  So my suggestion actually was not correct for that:
   !reg_overlap_mentioned_p (dest, XEXP (src, 0))
because the first extension above has r1:SI and r2:DI which don't
overlap, only r1:DI and r2:DI overlap.  So it probably should be checked
in combine_reaching_defs instead where you have already both the registers
in the right modes available and can call reg_overlap_mentioned_p on them
directly.  One argument would be SET_DEST (def_insn) and one SET_DEST
(cand-insn), right?

Jakub


Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jeff Law

On 12/20/13 10:25, Jakub Jelinek wrote:
  So it probably should be checked

in combine_reaching_defs instead where you have already both the registers
in the right modes available and can call reg_overlap_mentioned_p on them
directly.  One argument would be SET_DEST (def_insn) and one SET_DEST
(cand-insn), right?
Certainly my preference is to have all the tests for this exception live 
in combine_reaching_defs.


We actually need to test in the widened mode, so we have to generate a 
new reg expressing SET_DEST (def_insn) in the widened mode.  The other 
argument for the reg_overlap_mentioned_p call is SET_DEST (cand-insn).


I find myself wondering if we should be using the widened mode register 
for the calls to reg_{used,set}_between_p calls.  I'll have to ponder 
that for a few minutes.


jeff




Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jeff Law

On 12/20/13 10:25, Jakub Jelinek wrote:

Yes.  So my suggestion actually was not correct for that:
!reg_overlap_mentioned_p (dest, XEXP (src, 0))
because the first extension above has r1:SI and r2:DI which don't
overlap, only r1:DI and r2:DI overlap.  So it probably should be checked
in combine_reaching_defs instead where you have already both the registers
in the right modes available and can call reg_overlap_mentioned_p on them
directly.  One argument would be SET_DEST (def_insn) and one SET_DEST
(cand-insn), right?

Here's the updated version.

1. Minor test tweak per Uros's suggestion.
2. Fix formatting
3. Add testing for two destinations overlapping per above.

Bootstrapped and regression tested on x86_64-unknown-linux-gnu.  Ok for 
the trunk?


jeff

* ree.c (combine_set_extension): Handle case where source
and destination registers in an extension insn are different.
(combine_reaching_defs): Allow source and destination
registers in extension to be different under limited
circumstances.
(add_removable_extension): Remove restriction that the
source and destination registers in the extension are the
same.
(find_and_remove_re): Emit a copy from the extension's
destination to its source after the defining insn if
the source and destination registers are different.


testsuite/

* gcc.target/i386/pr53623.c: New test.


diff --git a/gcc/ree.c b/gcc/ree.c
index 9938e98..873b6d4 100644
--- a/gcc/ree.c
+++ b/gcc/ree.c
@@ -282,8 +282,20 @@ static bool
 combine_set_extension (ext_cand *cand, rtx curr_insn, rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
-  rtx new_reg = gen_rtx_REG (cand-mode, REGNO (SET_DEST (*orig_set)));
   rtx new_set;
+  rtx cand_pat = PATTERN (cand-insn);
+
+  /* If the extension's source/destination registers are not the same
+ then we need to change the original load to reference the destination
+ of the extension.  Then we need to emit a copy from that destination
+ to the original destination of the load.  */
+  rtx new_reg;
+  bool copy_needed
+= (REGNO (SET_DEST (cand_pat)) != REGNO (XEXP (SET_SRC (cand_pat), 0)));
+  if (copy_needed)
+new_reg = gen_rtx_REG (cand-mode, REGNO (SET_DEST (cand_pat)));
+  else
+new_reg = gen_rtx_REG (cand-mode, REGNO (SET_DEST (*orig_set)));
 
   /* Merge constants by directly moving the constant into the register under
  some conditions.  Recall that RTL constants are sign-extended.  */
@@ -342,7 +354,8 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx 
*orig_set)
   if (dump_file)
 {
   fprintf (dump_file,
-  Tentatively merged extension with definition:\n);
+  Tentatively merged extension with definition %s:\n,
+  (copy_needed) ? (copy needed) : );
   print_rtl_single (dump_file, curr_insn);
 }
   return true;
@@ -662,6 +675,53 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
   if (!outcome)
 return false;
 
+  /* If the destination operand of the extension is a different
+ register than the source operand, then additional restrictions
+ are needed.  */
+  if ((REGNO (SET_DEST (PATTERN (cand-insn)))
+   != REGNO (XEXP (SET_SRC (PATTERN (cand-insn)), 0
+{
+  /* In theory we could handle more than one reaching def, it
+just makes the code to update the insn stream more complex.  */
+  if (state-defs_list.length () != 1)
+   return false;
+
+  /* We require the candidate not already be modified.  This may
+be overly conservative.  */
+  if (state-modified[INSN_UID (cand-insn)].kind != EXT_MODIFIED_NONE)
+   return false;
+
+  /* There's only one reaching def.  */
+  rtx def_insn = state-defs_list[0];
+
+  /* The defining statementmust not have been modified either.  */
+  if (state-modified[INSN_UID (def_insn)].kind != EXT_MODIFIED_NONE)
+   return false;
+
+  /* The defining statement and candidate insn must be in the same block.
+This is merely to keep the test for safety and updating the insn
+stream simple.  */
+  if (BLOCK_FOR_INSN (cand-insn) != BLOCK_FOR_INSN (def_insn))
+   return false;
+
+  /* If there is an overlap between the destination of DEF_INSN and
+CAND-insn, then this transformation is not safe.  Note we have
+to test in the widened mode.  */
+  rtx tmp_reg = gen_rtx_REG (GET_MODE (SET_DEST (PATTERN (cand-insn))),
+REGNO (SET_DEST (PATTERN (def_insn;
+  if (reg_overlap_mentioned_p (tmp_reg, SET_DEST (PATTERN (cand-insn
+   return false;
+
+  /* The destination register of the extension insn must not be
+used or set between the def_insn and cand-insn exclusive.  */
+  if (reg_used_between_p (SET_DEST (PATTERN (cand-insn)),
+ def_insn, cand-insn)

Re: [RFA][PATCH][middle-end/53623] Improve extension elimination

2013-12-20 Thread Jakub Jelinek
On Fri, Dec 20, 2013 at 01:44:06PM -0700, Jeff Law wrote:
 @@ -342,7 +354,8 @@ combine_set_extension (ext_cand *cand, rtx curr_insn, rtx 
 *orig_set)
if (dump_file)
  {
fprintf (dump_file,
 -Tentatively merged extension with definition:\n);
 +Tentatively merged extension with definition %s:\n,
 +(copy_needed) ? (copy needed) : );

Missed this, you don't need the parens around the first copy_needed.

Ok with that change, thanks.

Jakub