[gcc r14-10853] aarch64: Assume alias conflict if common address reg changes [PR116783]

2024-10-30 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:434483ac32a08d1f3608c26fe2da302f0e09d6a2

commit r14-10853-g434483ac32a08d1f3608c26fe2da302f0e09d6a2
Author: Alex Coplan 
Date:   Wed Oct 30 13:46:12 2024 +

aarch64: Assume alias conflict if common address reg changes [PR116783]

As the PR shows, pair fusion was tricking memory_modified_in_insn_p into
returning false when a common base register (in this case, x1) was
modified between the mem and the store insn.  This lead to wrong code as
the accesses really did alias.

To avoid this sort of problem, this patch avoids invoking RTL alias
analysis altogether (and assume an alias conflict) if the two insns to
be compared share a common address register R, and the insns see different
definitions of R (i.e. it was modified in between).

This is a backport (but not a straight cherry pick) of
r15-4518-gc0e54ce1999ccf2241f74c5188b11b92e5aedc1f.

gcc/ChangeLog:

PR rtl-optimization/116783
* config/aarch64/aarch64-ldp-fusion.cc
(def_walker::cand_addr_uses): New.
(def_walker::def_walker): Add parameter for candidate address
uses.
(def_walker::alias_conflict_p): Declare.
(def_walker::addr_reg_conflict_p): New.
(def_walker::conflict_p): New.
(store_walker::store_walker): Add parameter for candidate
address uses and pass to base ctor.
(store_walker::conflict_p): Rename to ...
(store_walker::alias_conflict_p): ... this.
(load_walker::load_walker): Add parameter for candidate
address uses and pass to base ctor.
(load_walker::conflict_p): Rename to ...
(load_walker::alias_conflict_p): ... this.
(ldp_bb_info::try_fuse_pair): Collect address register
uses for candidate insns and pass down to alias walkers.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116783
* g++.dg/torture/pr116783.C: New test.

Diff:
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 127 ---
 gcc/testsuite/g++.dg/torture/pr116783.C  |  98 
 2 files changed, 213 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 1fc25e389cfe..f32d30d54c5c 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -2173,11 +2173,80 @@ protected:
 
   def_iter_t def_iter;
   insn_info *limit;
-  def_walker (def_info *def, insn_info *limit) :
-def_iter (def), limit (limit) {}
+
+  // Array of register uses from the candidate insn which occur in MEMs.
+  use_array cand_addr_uses;
+
+  def_walker (def_info *def, insn_info *limit, use_array addr_uses) :
+def_iter (def), limit (limit), cand_addr_uses (addr_uses) {}
 
   virtual bool iter_valid () const { return *def_iter; }
 
+  // Implemented in {load,store}_walker.
+  virtual bool alias_conflict_p (int &budget) const = 0;
+
+  // Return true if the current (walking) INSN () uses a register R inside a
+  // MEM, where R is also used inside a MEM by the (static) candidate insn, and
+  // those uses see different definitions of that register.  In this case we
+  // can't rely on RTL alias analysis, and for now we conservatively assume 
that
+  // there is an alias conflict.  See PR116783.
+  bool addr_reg_conflict_p () const
+  {
+use_array curr_insn_uses = insn ()->uses ();
+auto cand_use_iter = cand_addr_uses.begin ();
+auto insn_use_iter = curr_insn_uses.begin ();
+while (cand_use_iter != cand_addr_uses.end ()
+  && insn_use_iter != curr_insn_uses.end ())
+  {
+   auto insn_use = *insn_use_iter;
+   auto cand_use = *cand_use_iter;
+   if (insn_use->regno () > cand_use->regno ())
+ cand_use_iter++;
+   else if (insn_use->regno () < cand_use->regno ())
+ insn_use_iter++;
+   else
+ {
+   // As it stands I believe the alias code (memory_modified_in_insn_p)
+   // doesn't look at insn notes such as REG_EQU{IV,AL}, so it should
+   // be safe to skip over uses that only occur in notes.
+   if (insn_use->includes_address_uses ()
+   && !insn_use->only_occurs_in_notes ()
+   && insn_use->def () != cand_use->def ())
+ {
+   if (dump_file)
+ {
+   fprintf (dump_file,
+"assuming aliasing of cand i%d and i%d:\n"
+"-> insns see different defs of common addr reg 
r%u\n"
+"-> ",
+cand_use->insn ()->uid (), insn_use->insn ()->uid 
(),
+insn_use->regno ());
+
+   // Note that while the following sequence could be made more
+   // concise by eliding pp_string calls into 

[gcc r15-4518] pair-fusion: Assume alias conflict if common address reg changes [PR116783]

2024-10-21 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:c0e54ce1999ccf2241f74c5188b11b92e5aedc1f

commit r15-4518-gc0e54ce1999ccf2241f74c5188b11b92e5aedc1f
Author: Alex Coplan 
Date:   Fri Sep 20 17:39:39 2024 +0100

pair-fusion: Assume alias conflict if common address reg changes [PR116783]

As the PR shows, pair-fusion was tricking memory_modified_in_insn_p into
returning false when a common base register (in this case, x1) was
modified between the mem and the store insn.  This lead to wrong code as
the accesses really did alias.

To avoid this sort of problem, this patch avoids invoking RTL alias
analysis altogether (and assume an alias conflict) if the two insns to
be compared share a common address register R, and the insns see different
definitions of R (i.e. it was modified in between).

gcc/ChangeLog:

PR rtl-optimization/116783
* pair-fusion.cc (def_walker::cand_addr_uses): New.
(def_walker::def_walker): Add parameter for candidate address
uses.
(def_walker::alias_conflict_p): Declare.
(def_walker::addr_reg_conflict_p): New.
(def_walker::conflict_p): New.
(store_walker::store_walker): Add parameter for candidate
address uses and pass to base ctor.
(store_walker::conflict_p): Rename to ...
(store_walker::alias_conflict_p): ... this.
(load_walker::load_walker): Add parameter for candidate
address uses and pass to base ctor.
(load_walker::conflict_p): Rename to ...
(load_walker::alias_conflict_p): ... this.
(pair_fusion_bb_info::try_fuse_pair): Collect address register
uses for candidate insns and pass down to alias walkers.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116783
* g++.dg/torture/pr116783.C: New test.

Diff:
---
 gcc/pair-fusion.cc  | 127 +---
 gcc/testsuite/g++.dg/torture/pr116783.C |  98 
 2 files changed, 213 insertions(+), 12 deletions(-)

diff --git a/gcc/pair-fusion.cc b/gcc/pair-fusion.cc
index 653055fdcf67..ccbb5511e9d1 100644
--- a/gcc/pair-fusion.cc
+++ b/gcc/pair-fusion.cc
@@ -2089,11 +2089,80 @@ protected:
 
   def_iter_t def_iter;
   insn_info *limit;
-  def_walker (def_info *def, insn_info *limit) :
-def_iter (def), limit (limit) {}
+
+  // Array of register uses from the candidate insn which occur in MEMs.
+  use_array cand_addr_uses;
+
+  def_walker (def_info *def, insn_info *limit, use_array addr_uses) :
+def_iter (def), limit (limit), cand_addr_uses (addr_uses) {}
 
   virtual bool iter_valid () const { return *def_iter; }
 
+  // Implemented in {load,store}_walker.
+  virtual bool alias_conflict_p (int &budget) const = 0;
+
+  // Return true if the current (walking) INSN () uses a register R inside a
+  // MEM, where R is also used inside a MEM by the (static) candidate insn, and
+  // those uses see different definitions of that register.  In this case we
+  // can't rely on RTL alias analysis, and for now we conservatively assume 
that
+  // there is an alias conflict.  See PR116783.
+  bool addr_reg_conflict_p () const
+  {
+use_array curr_insn_uses = insn ()->uses ();
+auto cand_use_iter = cand_addr_uses.begin ();
+auto insn_use_iter = curr_insn_uses.begin ();
+while (cand_use_iter != cand_addr_uses.end ()
+  && insn_use_iter != curr_insn_uses.end ())
+  {
+   auto insn_use = *insn_use_iter;
+   auto cand_use = *cand_use_iter;
+   if (insn_use->regno () > cand_use->regno ())
+ cand_use_iter++;
+   else if (insn_use->regno () < cand_use->regno ())
+ insn_use_iter++;
+   else
+ {
+   // As it stands I believe the alias code (memory_modified_in_insn_p)
+   // doesn't look at insn notes such as REG_EQU{IV,AL}, so it should
+   // be safe to skip over uses that only occur in notes.
+   if (insn_use->includes_address_uses ()
+   && !insn_use->only_occurs_in_notes ()
+   && insn_use->def () != cand_use->def ())
+ {
+   if (dump_file)
+ {
+   fprintf (dump_file,
+"assuming aliasing of cand i%d and i%d:\n"
+"-> insns see different defs of common addr reg 
r%u\n"
+"-> ",
+cand_use->insn ()->uid (), insn_use->insn ()->uid 
(),
+insn_use->regno ());
+
+   // Note that while the following sequence could be made more
+   // concise by eliding pp_string calls into the pp_printf
+   // calls, doing so triggers -Wformat-diag.
+   pretty_printer pp;
+   pp_string (&pp, "[");
+   pp_access (&pp, cand_use, 0);
+   pp_st

[gcc r15-4465] MAINTAINERS: Add myself as pair fusion and aarch64 ldp/stp maintainer

2024-10-18 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:261d803c40c9fd28c59d8d1771051663f738a871

commit r15-4465-g261d803c40c9fd28c59d8d1771051663f738a871
Author: Alex Coplan 
Date:   Fri Oct 18 11:02:15 2024 +0100

MAINTAINERS: Add myself as pair fusion and aarch64 ldp/stp maintainer

ChangeLog:

* MAINTAINERS (CPU Port Maintainers): Add myself as aarch64 ldp/stp
maintainer.
(Various Maintainers): Add myself as pair fusion maintainer.

Diff:
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 269ac2ea6b49..1074886f4419 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -54,6 +54,7 @@ docs, and the testsuite related to that.
 
 CPU Port Maintainers(CPU alphabetical order)
 
+aarch64 ldp/stp Alex Coplan 
 aarch64 portRichard Earnshaw
 aarch64 portRichard Sandiford   
 aarch64 portMarcus Shawcroft
@@ -251,6 +252,7 @@ AutoFDO Eugene Rozenfeld

 reload  Ulrich Weigand  
 RTL optimizers  Eric Botcazou   
 instruction combinerSegher Boessenkool  
+pair fusion Alex Coplan 
 auto-vectorizer Richard Biener  
 auto-vectorizer Zdenek Dvorak   
 loop infrastructure Zdenek Dvorak   


[gcc r15-4106] testsuite: Prevent unrolling of main in LTO test [PR116683]

2024-10-07 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:7faadb1f261c6b8ef988c400c39ec7df09839dbe

commit r15-4106-g7faadb1f261c6b8ef988c400c39ec7df09839dbe
Author: Alex Coplan 
Date:   Thu Sep 26 16:36:48 2024 +0100

testsuite: Prevent unrolling of main in LTO test [PR116683]

In r15-3585-g9759f6299d9633cabac540e5c893341c708093ac I added a test which
started failing on PowerPC.  The test checks that we unroll exactly one loop
three times with the following:

// { dg-final { scan-ltrans-rtl-dump-times "Unrolled loop 3 times" 1 
"loop2_unroll" } }

which passes on most targets.  However, on PowerPC, the loop in main
gets unrolled too, causing the scan-ltrans-rtl-dump-times check to fail
as the statement now appears twice in the dump.  I think the extra
unrolling is due to different unrolling heuristics in the rs6000 port.

This patch therefore explicitly tries to block the unrolling in main with an
appropriate #pragma.

gcc/testsuite/ChangeLog:

PR testsuite/116683
* g++.dg/ext/pragma-unroll-lambda-lto.C (main): Add #pragma to
prevent unrolling of the setup loop.

Diff:
---
 gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C 
b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
index ddf11730e338..0db57c8d3a01 100644
--- a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
+++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
@@ -25,6 +25,7 @@ short *use_find(short *p)
 int main(void)
 {
   short a[1024];
+#pragma GCC unroll 0
   for (int i = 0; i < 1024; i++)
 a[i] = rand ();


[gcc r15-3585] lto: Stream has_unroll flag during LTO [PR116140]

2024-09-11 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:9759f6299d9633cabac540e5c893341c708093ac

commit r15-3585-g9759f6299d9633cabac540e5c893341c708093ac
Author: Alex Coplan 
Date:   Sat Aug 3 17:02:36 2024 +

lto: Stream has_unroll flag during LTO [PR116140]

When #pragma GCC unroll is processed in
tree-cfg.cc:replace_loop_annotate_in_block, we set both the loop->unroll
field (which is currently streamed out and back in during LTO) but also
the cfun->has_unroll flag.

cfun->has_unroll, however, is not currently streamed during LTO.  This
patch fixes that.

Prior to this patch, loops marked with #pragma GCC unroll that would be
unrolled by RTL loop2_unroll in a non-LTO compilation didn't get
unrolled under LTO.

gcc/ChangeLog:

PR libstdc++/116140
* lto-streamer-in.cc (input_struct_function_base): Stream in
fn->has_unroll.
* lto-streamer-out.cc (output_struct_function_base): Stream out
fn->has_unroll.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* g++.dg/ext/pragma-unroll-lambda-lto.C: New test.

Diff:
---
 gcc/lto-streamer-in.cc |  1 +
 gcc/lto-streamer-out.cc|  1 +
 .../g++.dg/ext/pragma-unroll-lambda-lto.C  | 32 ++
 3 files changed, 34 insertions(+)

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 64f758073280..9d0ec5d589c4 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1326,6 +1326,7 @@ input_struct_function_base (struct function *fn, class 
data_in *data_in,
   fn->has_force_vectorize_loops = bp_unpack_value (&bp, 1);
   fn->has_simduid_loops = bp_unpack_value (&bp, 1);
   fn->has_musttail = bp_unpack_value (&bp, 1);
+  fn->has_unroll = bp_unpack_value (&bp, 1);
   fn->assume_function = bp_unpack_value (&bp, 1);
   fn->va_list_fpr_size = bp_unpack_value (&bp, 8);
   fn->va_list_gpr_size = bp_unpack_value (&bp, 8);
diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
index a4b171358d41..807b935537be 100644
--- a/gcc/lto-streamer-out.cc
+++ b/gcc/lto-streamer-out.cc
@@ -2283,6 +2283,7 @@ output_struct_function_base (struct output_block *ob, 
struct function *fn)
   bp_pack_value (&bp, fn->has_force_vectorize_loops, 1);
   bp_pack_value (&bp, fn->has_simduid_loops, 1);
   bp_pack_value (&bp, fn->has_musttail, 1);
+  bp_pack_value (&bp, fn->has_unroll, 1);
   bp_pack_value (&bp, fn->assume_function, 1);
   bp_pack_value (&bp, fn->va_list_fpr_size, 8);
   bp_pack_value (&bp, fn->va_list_gpr_size, 8);
diff --git a/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C 
b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
new file mode 100644
index ..144c4c326924
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/pragma-unroll-lambda-lto.C
@@ -0,0 +1,32 @@
+// { dg-do link { target c++11 } }
+// { dg-options "-O2 -flto -fdump-rtl-loop2_unroll" }
+
+#include 
+
+template
+inline Iter
+my_find(Iter first, Iter last, Pred pred)
+{
+#pragma GCC unroll 4
+while (first != last && !pred(*first))
+++first;
+return first;
+}
+
+__attribute__((noipa))
+short *use_find(short *p)
+{
+auto pred = [](short x) { return x == 42; };
+return my_find(p, p + 1024, pred);
+}
+
+int main(void)
+{
+  short a[1024];
+  for (int i = 0; i < 1024; i++)
+a[i] = rand ();
+
+  return use_find (a) - a;
+}
+
+// { dg-final { scan-ltrans-rtl-dump-times "Unrolled loop 3 times" 1 
"loop2_unroll" } }


[gcc r15-3584] testsuite: Ensure ltrans dump files get cleaned up properly [PR116140]

2024-09-11 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:31ff173c70847bba94613eac5b1ef2c0bec842e6

commit r15-3584-g31ff173c70847bba94613eac5b1ef2c0bec842e6
Author: Alex Coplan 
Date:   Thu Aug 8 13:15:39 2024 +

testsuite: Ensure ltrans dump files get cleaned up properly [PR116140]

I noticed while working on a test that uses LTO and requests a dump
file, that we are failing to cleanup ltrans dump files in the testsuite.

E.g. the test I was working on compiles with -flto
-fdump-rtl-loop2_unroll, and we end up with the following file:

./gcc/testsuite/g++/pr116140.ltrans0.ltrans.287r.loop2_unroll

being left behind by the testsuite.  This is problematic not just from a
"missing cleanup" POV, but also because it can cause the test to pass
spuriously when the test is re-run wtih an unpatched compiler (without
the bug fix).  In the broken case, loop2_unroll isn't run at all, so we
end up scanning the old dumpfile (from the previous test run) and making
the dumpfile scan pass.

Running with `-v -v` in RUNTESTFLAGS we can see the following cleanup
attempt is made:

remove-build-file 
`pr116140.{C,exe}.{ltrans[0-9]*.,}[0-9][0-9][0-9]{l,i,r,t}.*'

looking again at the ltrans dump file above we can see this will fail for 
two
reasons:

 - The actual dump file has no {C,exe} extension between the basename and
   ltrans0.
 - The actual dump file has an additional `.ltrans` component after 
`.ltrans0`.

This patch therefore relaxes the pattern constructed for cleaning up such
dumpfiles to also match dumpfiles with the above form.

Running the testsuite before/after this patch shows the number of files in
gcc/testsuite (in the build dir) with "ltrans" in the name goes from 1416 
to 62
on aarch64.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* lib/gcc-dg.exp (schedule-cleanups): Relax ltrans dumpfile
cleanup pattern to handle missing cases.

Diff:
---
 gcc/testsuite/lib/gcc-dg.exp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp
index d9513e2859ce..cb401a704359 100644
--- a/gcc/testsuite/lib/gcc-dg.exp
+++ b/gcc/testsuite/lib/gcc-dg.exp
@@ -190,7 +190,7 @@ proc schedule-cleanups { opts } {
 # Handle ltrans files around -flto
 if [regexp -- {(^|\s+)-flto(\s+|$)} $opts] {
verbose "Cleanup -flto seen" 4
-   set ltrans "{ltrans\[0-9\]*.,}"
+   set ltrans "{ltrans\[0-9\]*{.ltrans,}.,}"
 } else {
set ltrans ""
 }
@@ -206,7 +206,7 @@ proc schedule-cleanups { opts } {
if {$basename_ext != ""} {
regsub -- {^.*\.} $basename_ext {} basename_ext
}
-   lappend tfiles "$stem.{$basename_ext,exe}"
+   lappend tfiles "$stem{.$basename_ext,.exe,}"
unset basename_ext
} else {
lappend tfiles $basename


[gcc r15-3583] c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions [PR116140]

2024-09-11 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:f97d86242b86e4ad2bef3623c97e91481840a210

commit r15-3583-gf97d86242b86e4ad2bef3623c97e91481840a210
Author: Alex Coplan 
Date:   Fri Aug 2 09:52:50 2024 +0100

c++: Ensure ANNOTATE_EXPRs remain outermost expressions in conditions 
[PR116140]

For the testcase added with this patch, we would end up losing the:

  #pragma GCC unroll 4

and emitting "warning: ignoring loop annotation".  That warning comes
from tree-cfg.cc:replace_loop_annotate, and means that we failed to
process the ANNOTATE_EXPR in tree-cfg.cc:replace_loop_annotate_in_block.
That function walks backwards over the GIMPLE in an exiting BB for a
loop, skipping over the final gcond, and looks for any ANNOTATE_EXPRS
immediately preceding the gcond.

The function documents the following pre-condition:

   /* [...] We assume that the annotations come immediately before the
  condition in BB, if any.  */

now looking at the exiting BB of the loop, we have:

   :
  D.4524 = .ANNOTATE (iftmp.1, 1, 4);
  retval.0 = D.4524;
  if (retval.0 != 0)
goto ; [INV]
  else
goto ; [INV]

and crucially there is an intervening assignment between the gcond and
the preceding .ANNOTATE ifn call.  To see where this comes from, we can
look to the IR given by -fdump-tree-original:

  if (<::operator() (&pred, *first), unroll 4>>>)
goto ;
  else
goto ;

here the problem is that we've wrapped a CLEANUP_POINT_EXPR around the
ANNOTATE_EXPR, meaning the ANNOTATE_EXPR is no longer the outermost
expression in the condition.

The CLEANUP_POINT_EXPR gets added by the following call chain:

finish_while_stmt_cond
 -> maybe_convert_cond
 -> condition_conversion
 -> fold_build_cleanup_point_expr

this patch chooses to fix the issue by first introducing a new helper
class (annotate_saver) to save and restore outer chains of
ANNOTATE_EXPRs and then using it in maybe_convert_cond.

With this patch, we don't get any such warning and the loop gets unrolled as
expected at -O2.

gcc/cp/ChangeLog:

PR libstdc++/116140
* semantics.cc (anotate_saver): New. Use it ...
(maybe_convert_cond): ... here, to ensure any ANNOTATE_EXPRs
remain the outermost expression(s) of the condition.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* g++.dg/ext/pragma-unroll-lambda.C: New test.

Diff:
---
 gcc/cp/semantics.cc | 88 -
 gcc/testsuite/g++.dg/ext/pragma-unroll-lambda.C | 17 +
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 3e117c216da5..63212afafb3b 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -951,6 +951,86 @@ maybe_warn_unparenthesized_assignment (tree t, bool 
nested_p,
 }
 }
 
+/* Helper class for saving/restoring ANNOTATE_EXPRs.  For a tree node t, users
+   can construct one of these like so:
+
+ annotate_saver s (&t);
+
+   and t will be updated to have any annotations removed.  The user can then
+   transform t, and later restore the ANNOTATE_EXPRs with:
+
+ t = s.restore (t).
+
+   The intent is to ensure that any ANNOTATE_EXPRs remain the outermost
+   expressions following any operations on t.  */
+
+class annotate_saver {
+  /* The chain of saved annotations, if there were any.  Otherwise null.  */
+  tree m_annotations;
+
+  /* If M_ANNOTATIONS is non-null, then M_INNER points to TREE_OPERAND (A, 0)
+ for the innermost annotation A.  */
+  tree *m_inner;
+
+public:
+  annotate_saver (tree *);
+  tree restore (tree);
+};
+
+/* If *COND is an ANNOTATE_EXPR, walk through the chain of annotations, and set
+   *COND equal to the first non-ANNOTATE_EXPR (saving a pointer to the
+   original chain of annotations for later use in restore).  */
+
+annotate_saver::annotate_saver (tree *cond) : m_annotations (nullptr)
+{
+  tree *t = cond;
+  while (TREE_CODE (*t) == ANNOTATE_EXPR)
+t = &TREE_OPERAND (*t, 0);
+
+  if (t != cond)
+{
+  m_annotations = *cond;
+  *cond = *t;
+  m_inner = t;
+}
+}
+
+/* If we didn't strip any annotations on construction, return NEW_INNER
+   unmodified.  Otherwise, wrap the saved annotations around NEW_INNER 
(updating
+   the types and flags of the annotations if needed) and return the resulting
+   expression.  */
+
+tree
+annotate_saver::restore (tree new_inner)
+{
+  if (!m_annotations)
+return new_inner;
+
+  /* If the type of the inner expression changed, we need to update the types
+ of all the ANNOTATE_EXPRs.  We may need to update the flags too, but we
+ assume they only change if the type of the inner expression changes.
+ The flag update logic assumes that the other operands to the
+ ANNOTATE_EXPRs are always INTEGER_CS

[gcc r15-3378] testsuite: Rename scanltranstree.exp -> scanltrans.exp

2024-09-02 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:e4d3e7f9add34216f4baffd3124bcb22a82c39bf

commit r15-3378-ge4d3e7f9add34216f4baffd3124bcb22a82c39bf
Author: Alex Coplan 
Date:   Wed Aug 28 14:53:11 2024 +0100

testsuite: Rename scanltranstree.exp -> scanltrans.exp

Since r15-3254-g3f51f0dc88ec21c1ec79df694200f10ef85915f4
added scan-ltrans-rtl* variants to scanltranstree.exp, it no longer
makes sense to have "tree" in the name.  This renames the file
accordingly and updates users.

libatomic/ChangeLog:

* testsuite/lib/libatomic.exp: Load scanltrans.exp instead of
scanltranstree.exp.

libgomp/ChangeLog:

* testsuite/lib/libgomp.exp: Load scanltrans.exp instead of
scanltranstree.exp.

libitm/ChangeLog:

* testsuite/lib/libitm.exp: Load scanltrans.exp instead of
scanltranstree.exp.

libphobos/ChangeLog:

* testsuite/lib/libphobos-dg.exp: Load scanltrans.exp instead of
scanltranstree.exp.

libvtv/ChangeLog:

* testsuite/lib/libvtv.exp: Load scanltrans.exp instead of
scanltranstree.exp.

gcc/testsuite/ChangeLog:

* gcc.dg-selftests/dg-final.exp: Load scanltrans.exp instead of
scanltranstree.exp.
* lib/gcc-dg.exp: Likewise.
* lib/scanltranstree.exp: Rename to ...
* lib/scanltrans.exp: ... this.

Diff:
---
 gcc/testsuite/gcc.dg-selftests/dg-final.exp  | 2 +-
 gcc/testsuite/lib/gcc-dg.exp | 2 +-
 gcc/testsuite/lib/{scanltranstree.exp => scanltrans.exp} | 0
 libatomic/testsuite/lib/libatomic.exp| 2 +-
 libgomp/testsuite/lib/libgomp.exp| 2 +-
 libitm/testsuite/lib/libitm.exp  | 2 +-
 libphobos/testsuite/lib/libphobos-dg.exp | 2 +-
 libvtv/testsuite/lib/libvtv.exp  | 2 +-
 8 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/testsuite/gcc.dg-selftests/dg-final.exp 
b/gcc/testsuite/gcc.dg-selftests/dg-final.exp
index 6b6f32e0510c..5503b0c09114 100644
--- a/gcc/testsuite/gcc.dg-selftests/dg-final.exp
+++ b/gcc/testsuite/gcc.dg-selftests/dg-final.exp
@@ -23,7 +23,7 @@ load_lib "scanlang.exp"
 load_lib "lto.exp"
 load_lib "scanasm.exp"
 load_lib "scanwpaipa.exp"
-load_lib "scanltranstree.exp"
+load_lib "scanltrans.exp"
 load_lib "scanoffloadtree.exp"
 load_lib "scanoffloadrtl.exp"
 load_lib "gcc-dg.exp"
diff --git a/gcc/testsuite/lib/gcc-dg.exp b/gcc/testsuite/lib/gcc-dg.exp
index 992062103c12..d9513e2859ce 100644
--- a/gcc/testsuite/lib/gcc-dg.exp
+++ b/gcc/testsuite/lib/gcc-dg.exp
@@ -21,7 +21,7 @@ load_lib target-supports-dg.exp
 load_lib scanasm.exp
 load_lib scanrtl.exp
 load_lib scantree.exp
-load_lib scanltranstree.exp
+load_lib scanltrans.exp
 load_lib scanipa.exp
 load_lib scanwpaipa.exp
 load_lib scanlang.exp
diff --git a/gcc/testsuite/lib/scanltranstree.exp 
b/gcc/testsuite/lib/scanltrans.exp
similarity index 100%
rename from gcc/testsuite/lib/scanltranstree.exp
rename to gcc/testsuite/lib/scanltrans.exp
diff --git a/libatomic/testsuite/lib/libatomic.exp 
b/libatomic/testsuite/lib/libatomic.exp
index ed6ba806732f..642530557f78 100644
--- a/libatomic/testsuite/lib/libatomic.exp
+++ b/libatomic/testsuite/lib/libatomic.exp
@@ -38,7 +38,7 @@ load_gcc_lib scanlang.exp
 load_gcc_lib scanrtl.exp
 load_gcc_lib scansarif.exp
 load_gcc_lib scantree.exp
-load_gcc_lib scanltranstree.exp
+load_gcc_lib scanltrans.exp
 load_gcc_lib scanipa.exp
 load_gcc_lib scanwpaipa.exp
 load_gcc_lib multiline.exp
diff --git a/libgomp/testsuite/lib/libgomp.exp 
b/libgomp/testsuite/lib/libgomp.exp
index 7c1092629168..2d0339b5e565 100644
--- a/libgomp/testsuite/lib/libgomp.exp
+++ b/libgomp/testsuite/lib/libgomp.exp
@@ -31,7 +31,7 @@ load_gcc_lib scanlang.exp
 load_gcc_lib scanrtl.exp
 load_gcc_lib scansarif.exp
 load_gcc_lib scantree.exp
-load_gcc_lib scanltranstree.exp
+load_gcc_lib scanltrans.exp
 load_gcc_lib scanoffload.exp
 load_gcc_lib scanoffloadipa.exp
 load_gcc_lib scanoffloadtree.exp
diff --git a/libitm/testsuite/lib/libitm.exp b/libitm/testsuite/lib/libitm.exp
index 3e60797c3e31..0182234a24ab 100644
--- a/libitm/testsuite/lib/libitm.exp
+++ b/libitm/testsuite/lib/libitm.exp
@@ -44,7 +44,7 @@ load_gcc_lib scanlang.exp
 load_gcc_lib scanrtl.exp
 load_gcc_lib scansarif.exp
 load_gcc_lib scantree.exp
-load_gcc_lib scanltranstree.exp
+load_gcc_lib scanltrans.exp
 load_gcc_lib scanipa.exp
 load_gcc_lib scanwpaipa.exp
 load_gcc_lib timeout-dg.exp
diff --git a/libphobos/testsuite/lib/libphobos-dg.exp 
b/libphobos/testsuite/lib/libphobos-dg.exp
index 965ff025a04d..90bc02ef5e5e 100644
--- a/libphobos/testsuite/lib/libphobos-dg.exp
+++ b/libphobos/testsuite/lib/libphobos-dg.exp
@@ -17,7 +17,7 @@
 load_gcc_lib multiline.exp
 load_gcc_lib prune.exp
 load_gcc_lib scandump.exp
-load_gcc_lib scanltranstree.exp
+load_gcc_lib scanltrans.exp
 lo

[gcc r15-3322] gdbhooks: Fix printing of vec with vl_ptr layout

2024-08-30 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:5020f8ea80af90d8a08eff9fdef3276056df98f5

commit r15-3322-g5020f8ea80af90d8a08eff9fdef3276056df98f5
Author: Alex Coplan 
Date:   Fri Aug 30 15:29:34 2024 +0100

gdbhooks: Fix printing of vec with vl_ptr layout

As it stands, the pretty printing of GCC's vecs by gdbhooks.py only
handles vectors with vl_embed layout.  As such, when encountering a vec
with vl_ptr layout, GDB would print a diagnostic like:

  gdb.error: There is no member or method named m_vecpfx.

when (e.g.) any such vec occurred in a backtrace.  This patch extends
VecPrinter.children to also handle vl_ptr vectors.

gcc/ChangeLog:

* gdbhooks.py (VEC_KIND_EMBED): New.
(VEC_KIND_PTR): New.
(get_vec_kind): New.
(VecPrinter.children): Also handle vectors with vl_ptr layout.

Diff:
---
 gcc/gdbhooks.py | 30 +++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/gcc/gdbhooks.py b/gcc/gdbhooks.py
index 7a64c03b8acb..904ee28423a9 100644
--- a/gcc/gdbhooks.py
+++ b/gcc/gdbhooks.py
@@ -453,6 +453,25 @@ class PassPrinter:
 
 ##
 
+VEC_KIND_EMBED = 0
+VEC_KIND_PTR = 1
+
+"""
+Given a vec or pointer to vec, return its layout (embedded or space
+efficient).
+"""
+def get_vec_kind(val):
+typ = val.type
+if typ.code == gdb.TYPE_CODE_PTR:
+typ = typ.target()
+kind = typ.template_argument(2).name
+if kind == "vl_embed":
+return VEC_KIND_EMBED
+elif kind == "vl_ptr":
+return VEC_KIND_PTR
+else:
+assert False, f"unexpected vec kind {kind}"
+
 class VecPrinter:
 #-ex "up" -ex "p bb->preds"
 def __init__(self, gdbval):
@@ -467,11 +486,16 @@ class VecPrinter:
 return '0x%x' % intptr(self.gdbval)
 
 def children (self):
-if intptr(self.gdbval) == 0:
+val = self.gdbval
+if intptr(val) != 0 and get_vec_kind(val) == VEC_KIND_PTR:
+val = val['m_vec']
+
+if intptr(val) == 0:
 return
-m_vecpfx = self.gdbval['m_vecpfx']
+
+assert get_vec_kind(val) == VEC_KIND_EMBED
+m_vecpfx = val['m_vecpfx']
 m_num = m_vecpfx['m_num']
-val = self.gdbval
 typ = val.type
 if typ.code == gdb.TYPE_CODE_PTR:
 typ = typ.target()


[gcc r15-3283] testsuite: Fix up refactored scanltranstree.exp functions [PR116522]

2024-08-29 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:4b729d2ff3259e5b1d40f93d4f9e7edf5f0064f4

commit r15-3283-g4b729d2ff3259e5b1d40f93d4f9e7edf5f0064f4
Author: Alex Coplan 
Date:   Thu Aug 29 11:31:40 2024 +0100

testsuite: Fix up refactored scanltranstree.exp functions [PR116522]

When adding RTL variants of the scan-ltrans-tree* functions in:
r15-3254-g3f51f0dc88ec21c1ec79df694200f10ef85915f4
I messed up the name of the underlying scan function to invoke.  The
code currently attempts to invoke functions named
scan{,-not,-dem,-dem-not} but should instead be invoking
scan-dump{,-not,-dem,-dem-not}.  This patch fixes that.

gcc/testsuite/ChangeLog:

PR testsuite/116522
* lib/scanltranstree.exp: Fix name of underlying scan function
used for scan-ltrans-{tree,rtl}-dump{,-not,-dem,-dem-not}.

Diff:
---
 gcc/testsuite/lib/scanltranstree.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/lib/scanltranstree.exp 
b/gcc/testsuite/lib/scanltranstree.exp
index a7d4de3765fc..3d85813ea2f9 100644
--- a/gcc/testsuite/lib/scanltranstree.exp
+++ b/gcc/testsuite/lib/scanltranstree.exp
@@ -24,7 +24,7 @@ load_lib scandump.exp
 foreach ir { tree rtl } {
 foreach modifier { {} -not -dem -dem-not } {
eval [string map [list @NAME@ scan-ltrans-$ir-dump$modifier \
-  @SCAN@ scan$modifier \
+  @SCAN@ scan-dump$modifier \
   @TYPE@ ltrans-$ir \
   @SUFFIX@ [string index $ir 0]] {
proc @NAME@ { args } {


[gcc r15-3254] testsuite: Add scan-ltrans-rtl* for use in dg-final [PR116140]

2024-08-28 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:3f51f0dc88ec21c1ec79df694200f10ef85915f4

commit r15-3254-g3f51f0dc88ec21c1ec79df694200f10ef85915f4
Author: Alex Coplan 
Date:   Tue Aug 27 16:51:12 2024 +

testsuite: Add scan-ltrans-rtl* for use in dg-final [PR116140]

This extends the scan-ltrans-tree* helpers to create RTL variants.  This
is needed to check the behaviour of an RTL pass under LTO.

gcc/ChangeLog:

PR libstdc++/116140
* doc/sourcebuild.texi: Document ltrans-rtl value of kind for
scan--dump*.

gcc/testsuite/ChangeLog:

PR libstdc++/116140
* lib/scanltranstree.exp (scan-ltrans-rtl-dump): New.
(scan-ltrans-rtl-dump-not): New.
(scan-ltrans-rtl-dump-dem): New.
(scan-ltrans-rtl-dump-dem-not): New.
(scan-ltrans-rtl-dump-times): New.

Diff:
---
 gcc/doc/sourcebuild.texi |  4 +-
 gcc/testsuite/lib/scanltranstree.exp | 80 +---
 2 files changed, 39 insertions(+), 45 deletions(-)

diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 3c55f1037953..0636fc0567c5 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -3627,8 +3627,8 @@ stands for zero or more unmatched lines; the whitespace 
after
 @subsubsection Scan optimization dump files
 
 These commands are available for @var{kind} of @code{tree}, @code{ltrans-tree},
-@code{offload-tree}, @code{rtl}, @code{offload-rtl}, @code{ipa},
-@code{offload-ipa}, and @code{wpa-ipa}.
+@code{offload-tree}, @code{rtl}, @code{ltrans-rtl}, @code{offload-rtl},
+@code{ipa}, @code{offload-ipa}, and @code{wpa-ipa}.
 
 @table @code
 @item scan-@var{kind}-dump @var{regex} @var{suffix} [@{ target/xfail 
@var{selector} @}]
diff --git a/gcc/testsuite/lib/scanltranstree.exp 
b/gcc/testsuite/lib/scanltranstree.exp
index bc6e02dc3696..a7d4de3765fc 100644
--- a/gcc/testsuite/lib/scanltranstree.exp
+++ b/gcc/testsuite/lib/scanltranstree.exp
@@ -19,50 +19,44 @@
 
 load_lib scandump.exp
 
-# The first item in the list is an LTO equivalent of the second item
-# in the list; see the documentation of the second item for details.
-foreach { name scan type suffix } {
-scan-ltrans-tree-dump scan-dump ltrans-tree t
-scan-ltrans-tree-dump-not scan-dump-not ltrans-tree t
-scan-ltrans-tree-dump-dem scan-dump-dem ltrans-tree t
-scan-ltrans-tree-dump-dem-not scan-dump-dem-not ltrans-tree t
-} {
-eval [string map [list @NAME@ $name \
-  @SCAN@ $scan \
-  @TYPE@ $type \
-  @SUFFIX@ $suffix] {
-proc @NAME@ { args } {
-   if { [llength $args] < 2 } {
-   error "@NAME@: too few arguments"
-   return
-   }
-   if { [llength $args] > 3 } {
-   error "@NAME@: too many arguments"
-   return
+# Define scan-ltrans-{tree,rtl}-dump{,-not,-dem,-dem-not}.  These are LTO
+# variants of the corresponding functions without -ltrans in the name.
+foreach ir { tree rtl } {
+foreach modifier { {} -not -dem -dem-not } {
+   eval [string map [list @NAME@ scan-ltrans-$ir-dump$modifier \
+  @SCAN@ scan$modifier \
+  @TYPE@ ltrans-$ir \
+  @SUFFIX@ [string index $ir 0]] {
+   proc @NAME@ { args } {
+   if { [llength $args] < 2 } {
+   error "@NAME@: too few arguments"
+   return
+   }
+   if { [llength $args] > 3 } {
+   error "@NAME@: too many arguments"
+   return
+   }
+   if { [llength $args] >= 3 } {
+   @SCAN@ @TYPE@ [lindex $args 0] \
+   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
+   ".ltrans0.ltrans" \
+   [lindex $args 2]
+   } else {
+   @SCAN@ @TYPE@ [lindex $args 0] \
+   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
+   ".ltrans0.ltrans"
+   }
}
-   if { [llength $args] >= 3 } {
-   @SCAN@ @TYPE@ [lindex $args 0] \
-   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
-   ".ltrans0.ltrans" \
-   [lindex $args 2]
-   } else {
-   @SCAN@ @TYPE@ [lindex $args 0] \
-   "\[0-9\]\[0-9\]\[0-9\]@SUFFIX@.[lindex $args 1]" \
-   ".ltrans0.ltrans"
-   }
-}
-}]
+   }]
+}
 }
 
-# The first item in the list is an LTO equivalent of the second item
-# in the list; see the documentation of the second item for details.
-foreach { name scan type suffix } {
-scan-ltrans-tree-dump-times scan-dump-times ltrans-tree t
-} {
-eval [string map [list @NAME@ $name \
-  @SCAN@ $scan \
-

[gcc r15-2715] gdbhooks: Add attempt to invoke on-gcc-hooks-load

2024-08-05 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:f01df5e47b2551e0f435a9efa8e0a30142f3d46b

commit r15-2715-gf01df5e47b2551e0f435a9efa8e0a30142f3d46b
Author: Alex Coplan 
Date:   Mon Aug 5 08:45:58 2024 +0100

gdbhooks: Add attempt to invoke on-gcc-hooks-load

This extends GCC's GDB hooks to attempt invoking the user-defined
command "on-gcc-hooks-load".  The idea is that users can define the
command in their .gdbinit to override the default values of parameters
defined by GCC's GDB extensions.

For example, together with the previous patch, I can add the following
fragment to my .gdbinit:

define on-gcc-hooks-load
  set gcc-dot-cmd xdot
end

which means, once the GCC extensions get loaded, whenever I invoke
dot-fn then the graph will be rendered using xdot.

The try/except should make this patch a no-op for users that don't
currently define this command.  I looked for a way to test explicitly
for whether a GDB command exists but didn't find one.

This is needed because the user's .gdbinit is sourced before GCC's GDB
extensions are loaded, and GCC-specific parameters can't be configured
before they are defined.

gcc/ChangeLog:

* gdbhooks.py: Add attempted call to "on-gcc-hooks-load" once
we've finished loading the hooks.

Diff:
---
 gcc/gdbhooks.py | 8 
 1 file changed, 8 insertions(+)

diff --git a/gcc/gdbhooks.py b/gcc/gdbhooks.py
index db8ce0d071bb..7a64c03b8acb 100644
--- a/gcc/gdbhooks.py
+++ b/gcc/gdbhooks.py
@@ -865,4 +865,12 @@ class DotFn(gdb.Command):
 
 DotFn()
 
+# Try and invoke the user-defined command "on-gcc-hooks-load".  Doing
+# this allows users to customize the GCC extensions once they've been
+# loaded by defining the hook in their .gdbinit.
+try:
+gdb.execute('on-gcc-hooks-load')
+except gdb.error:
+pass
+
 print('Successfully loaded GDB hooks for GCC')


[gcc r15-2714] gdbhooks: Make dot viewer configurable

2024-08-05 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:08cc516a8cfe553064f84a86be4c30f05a614342

commit r15-2714-g08cc516a8cfe553064f84a86be4c30f05a614342
Author: Alex Coplan 
Date:   Mon Aug 5 08:45:29 2024 +0100

gdbhooks: Make dot viewer configurable

This adds a new GDB parameter 'gcc-dot-cmd' which allows the user to
configure the command used to render the CFG within dot-fn.

E.g. with this patch the user can change their dot viewer like so:

(gdb) show gcc-dot-cmd
The current value of 'gcc-dot-cmd' is "dot -Tx11".
(gdb) set gcc-dot-cmd xdot
(gdb) dot-fn # opens in xdot

The second patch in this series adds a hook which users can define in
their .gdbinit in order to be called when the GCC extensions have
finished loading, thus allowing users to automatically configure
gcc-dot-cmd as desired in their .gdbinit.

gcc/ChangeLog:

* gdbhooks.py (GCCDotCmd): New.
(gcc_dot_cmd): New. Use it ...
(DotFn.invoke): ... here.

Diff:
---
 gcc/gdbhooks.py | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/gdbhooks.py b/gcc/gdbhooks.py
index 92e38880a70a..db8ce0d071bb 100644
--- a/gcc/gdbhooks.py
+++ b/gcc/gdbhooks.py
@@ -783,6 +783,18 @@ class DumpFn(gdb.Command):
 
 DumpFn()
 
+class GCCDotCmd(gdb.Parameter):
+"""
+This parameter controls the command used to render dot files within
+GCC's dot-fn command.  It will be invoked as gcc-dot-cmd .
+"""
+def __init__(self):
+super(GCCDotCmd, self).__init__('gcc-dot-cmd',
+gdb.COMMAND_NONE, gdb.PARAM_STRING)
+self.value = "dot -Tx11"
+
+gcc_dot_cmd = GCCDotCmd()
+
 class DotFn(gdb.Command):
 """
 A custom command to show a gimple/rtl function control flow graph.
@@ -848,7 +860,8 @@ class DotFn(gdb.Command):
 return
 
 # Show graph in temp file
-os.system("( dot -Tx11 \"%s\"; rm \"%s\" ) &" % (filename, filename))
+dot_cmd = gcc_dot_cmd.value
+os.system("( %s \"%s\"; rm \"%s\" ) &" % (dot_cmd, filename, filename))
 
 DotFn()


[gcc r15-1854] middle-end: Add debug functions to dump dominator tree in dot format

2024-07-05 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:ae07f62a70ee2d0fdd7d8786122ae6360cfd4ca9

commit r15-1854-gae07f62a70ee2d0fdd7d8786122ae6360cfd4ca9
Author: Alex Coplan 
Date:   Fri Jul 5 11:57:56 2024 +0100

middle-end: Add debug functions to dump dominator tree in dot format

This adds debug functions to dump the dominator tree in dot format.
There are two overloads: one which takes a FILE * and another which
takes a const char *fname and wraps the first with fopen/fclose for
convenience.

gcc/ChangeLog:

* dominance.cc (dot_dominance_tree): New.

Diff:
---
 gcc/dominance.cc | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/gcc/dominance.cc b/gcc/dominance.cc
index 0357210ed27..c14d997ded7 100644
--- a/gcc/dominance.cc
+++ b/gcc/dominance.cc
@@ -1658,6 +1658,36 @@ debug_dominance_info (enum cdi_direction dir)
   fprintf (stderr, "%i %i\n", bb->index, bb2->index);
 }
 
+/* Dump the dominance tree in direction DIR to the file F in dot form.
+   This allows easily visualizing the tree using graphviz.  */
+
+DEBUG_FUNCTION void
+dot_dominance_tree (FILE *f, enum cdi_direction dir)
+{
+  fprintf (f, "digraph {\n");
+  basic_block bb, idom;
+  FOR_EACH_BB_FN (bb, cfun)
+if ((idom = get_immediate_dominator (dir, bb)))
+  fprintf (f, "%i -> %i;\n", idom->index, bb->index);
+  fprintf (f, "}\n");
+}
+
+/* Convenience wrapper around the above that dumps the dominance tree in
+   direction DIR to the file at path FNAME in dot form.  */
+
+DEBUG_FUNCTION void
+dot_dominance_tree (const char *fname, enum cdi_direction dir)
+{
+  FILE *f = fopen (fname, "w");
+  if (f)
+{
+  dot_dominance_tree (f, dir);
+  fclose (f);
+}
+  else
+fprintf (stderr, "failed to open %s: %s\n", fname, xstrerror (errno));
+}
+
 /* Prints to stderr representation of the dominance tree (for direction DIR)
rooted in ROOT, indented by INDENT tabulators.  If INDENT_FIRST is false,
the first line of the output is not indented.  */


[gcc r14-10369] aarch64: Fix typo in aarch64-ldp-fusion.cc:combine_reg_notes [PR114936]

2024-07-02 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:8eb469546f7f32dcec5d3376dfb419c1d4f21e4a

commit r14-10369-g8eb469546f7f32dcec5d3376dfb419c1d4f21e4a
Author: Alex Coplan 
Date:   Fri May 3 14:12:32 2024 +

aarch64: Fix typo in aarch64-ldp-fusion.cc:combine_reg_notes [PR114936]

This fixes a typo in combine_reg_notes in the load/store pair fusion
pass.  As it stands, the calls to filter_notes store any
REG_FRAME_RELATED_EXPR to fr_expr with the following association:

 - i2 -> fr_expr[0]
 - i1 -> fr_expr[1]

but then the checks inside the following if statement expect the
opposite (more natural) association, i.e.:

 - i2 -> fr_expr[1]
 - i1 -> fr_expr[0]

this patch fixes the oversight by swapping the fr_expr indices in the
calls to filter_notes.

In hindsight it would probably have been less confusing / error-prone to
have combine_reg_notes take an array of two insns, then we wouldn't have
to mix 1-based and 0-based indexing as well as remembering to call
filter_notes in reverse program order.  This however is a minimal fix
for backporting purposes.

gcc/ChangeLog:

PR target/114936
* config/aarch64/aarch64-ldp-fusion.cc (combine_reg_notes):
Ensure insn iN has its REG_FRAME_RELATED_EXPR (if any) stored in
FR_EXPR[N-1], thus matching the correspondence expected by the
copy_rtx calls.

(cherry picked from commit 73c8e24b692e691c665d0f1f5424432837bd8c06)

Diff:
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 365dcf48b22..1fc25e389cf 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -1093,9 +1093,9 @@ combine_reg_notes (insn_info *i1, insn_info *i2, bool 
load_p)
   bool found_eh_region = false;
   rtx result = NULL_RTX;
   result = filter_notes (REG_NOTES (i2->rtl ()), result,
-&found_eh_region, fr_expr);
-  result = filter_notes (REG_NOTES (i1->rtl ()), result,
 &found_eh_region, fr_expr + 1);
+  result = filter_notes (REG_NOTES (i1->rtl ()), result,
+&found_eh_region, fr_expr);
 
   if (!load_p)
 {


[gcc r15-320] aarch64: Fix typo in aarch64-ldp-fusion.cc:combine_reg_notes [PR114936]

2024-05-08 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:73c8e24b692e691c665d0f1f5424432837bd8c06

commit r15-320-g73c8e24b692e691c665d0f1f5424432837bd8c06
Author: Alex Coplan 
Date:   Fri May 3 14:12:32 2024 +

aarch64: Fix typo in aarch64-ldp-fusion.cc:combine_reg_notes [PR114936]

This fixes a typo in combine_reg_notes in the load/store pair fusion
pass.  As it stands, the calls to filter_notes store any
REG_FRAME_RELATED_EXPR to fr_expr with the following association:

 - i2 -> fr_expr[0]
 - i1 -> fr_expr[1]

but then the checks inside the following if statement expect the
opposite (more natural) association, i.e.:

 - i2 -> fr_expr[1]
 - i1 -> fr_expr[0]

this patch fixes the oversight by swapping the fr_expr indices in the
calls to filter_notes.

In hindsight it would probably have been less confusing / error-prone to
have combine_reg_notes take an array of two insns, then we wouldn't have
to mix 1-based and 0-based indexing as well as remembering to call
filter_notes in reverse program order.  This however is a minimal fix
for backporting purposes.

gcc/ChangeLog:

PR target/114936
* config/aarch64/aarch64-ldp-fusion.cc (combine_reg_notes):
Ensure insn iN has its REG_FRAME_RELATED_EXPR (if any) stored in
FR_EXPR[N-1], thus matching the correspondence expected by the
copy_rtx calls.

Diff:
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 437f0aeb7877..1d9caeab05d4 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -1085,9 +1085,9 @@ combine_reg_notes (insn_info *i1, insn_info *i2, bool 
load_p)
   bool found_eh_region = false;
   rtx result = NULL_RTX;
   result = filter_notes (REG_NOTES (i2->rtl ()), result,
-&found_eh_region, fr_expr);
-  result = filter_notes (REG_NOTES (i1->rtl ()), result,
 &found_eh_region, fr_expr + 1);
+  result = filter_notes (REG_NOTES (i1->rtl ()), result,
+&found_eh_region, fr_expr);
 
   if (!load_p)
 {


[gcc r15-282] aarch64: Preserve mem info on change of base for ldp/stp [PR114674]

2024-05-07 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:74690ff96b263b8609639b7fbc5d6afd3f19cb98

commit r15-282-g74690ff96b263b8609639b7fbc5d6afd3f19cb98
Author: Alex Coplan 
Date:   Wed Apr 10 16:30:36 2024 +0100

aarch64: Preserve mem info on change of base for ldp/stp [PR114674]

The ldp/stp fusion pass can change the base of an access so that the two
accesses end up using a common base register.  So far we have been using
adjust_address_nv to do this, but this means that we don't preserve
other properties of the mem we're replacing.  It seems better to use
replace_equiv_address_nv, as this will preserve e.g. the MEM_ALIGN of the
mem whose address we're changing.

The PR shows that by adjusting the other mem we lose alignment
information about the original access and therefore end up rejecting an
otherwise viable pair when --param=aarch64-stp-policy=aligned is passed.
This patch fixes that by using replace_equiv_address_nv instead.

Notably this is the same approach as taken by
aarch64_check_consecutive_mems when a change of base is required, so
this at least makes things more consistent between the ldp fusion pass
and the peepholes.

gcc/ChangeLog:

PR target/114674
* config/aarch64/aarch64-ldp-fusion.cc (ldp_bb_info::fuse_pair):
Use replace_equiv_address_nv on a change of base instead of
adjust_address_nv on the other access.

gcc/testsuite/ChangeLog:

PR target/114674
* gcc.target/aarch64/pr114674.c: New test.

Diff:
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc|  8 
 gcc/testsuite/gcc.target/aarch64/pr114674.c | 17 +
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 0bc225dae7b..437f0aeb787 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -1722,11 +1722,11 @@ ldp_bb_info::fuse_pair (bool load_p,
adjust_amt *= -1;
 
   rtx change_reg = XEXP (change_pat, !load_p);
-  machine_mode mode_for_mem = GET_MODE (change_mem);
   rtx effective_base = drop_writeback (base_mem);
-  rtx new_mem = adjust_address_nv (effective_base,
-  mode_for_mem,
-  adjust_amt);
+  rtx adjusted_addr = plus_constant (Pmode,
+XEXP (effective_base, 0),
+adjust_amt);
+  rtx new_mem = replace_equiv_address_nv (change_mem, adjusted_addr);
   rtx new_set = load_p
? gen_rtx_SET (change_reg, new_mem)
: gen_rtx_SET (new_mem, change_reg);
diff --git a/gcc/testsuite/gcc.target/aarch64/pr114674.c 
b/gcc/testsuite/gcc.target/aarch64/pr114674.c
new file mode 100644
index 000..944784fd008
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr114674.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param=aarch64-stp-policy=aligned" } */
+typedef struct {
+   unsigned int f1;
+   unsigned int f2;
+} test_struct;
+
+static test_struct ts = {
+   123, 456
+};
+
+void foo(void)
+{
+   ts.f2 = 36969 * (ts.f2 & 65535) + (ts.f1 >> 16);
+   ts.f1 = 18000 * (ts.f2 & 65535) + (ts.f2 >> 16);
+}
+/* { dg-final { scan-assembler-times "stp" 1 } } */


[gcc r14-10160] cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

2024-05-03 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:242fbc0df6c23115c47d256e66fba6a770265c5d

commit r14-10160-g242fbc0df6c23115c47d256e66fba6a770265c5d
Author: Alex Coplan 
Date:   Fri May 3 09:23:59 2024 +0100

cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

The PR shows that when cfgrtl.cc:duplicate_insn_chain attempts to
update the MR_DEPENDENCE_CLIQUE information for a MEM_EXPR we can end up
accidentally dropping (e.g.) an ARRAY_REF from the MEM_EXPR and end up
replacing it with the underlying MEM_REF.  This leads to an
inconsistency in the MEM_EXPR information, and could lead to wrong code.

While the walk down to the MEM_REF is necessary to update
MR_DEPENDENCE_CLIQUE, we should use the outer tree expression for the
MEM_EXPR.  This patch does that.

gcc/ChangeLog:

PR rtl-optimization/114924
* cfgrtl.cc (duplicate_insn_chain): When updating MEM_EXPRs,
don't strip (e.g.) ARRAY_REFs from the final MEM_EXPR.

(cherry picked from commit fe40d525619eee9c2821126390df75068df4773a)

Diff:
---
 gcc/cfgrtl.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc
index 304c429c99b..a5dc3512159 100644
--- a/gcc/cfgrtl.cc
+++ b/gcc/cfgrtl.cc
@@ -4432,12 +4432,13 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
   since MEM_EXPR is shared so make a copy and
   walk to the subtree again.  */
tree new_expr = unshare_expr (MEM_EXPR (*iter));
+   tree orig_new_expr = new_expr;
if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
  new_expr = TREE_OPERAND (new_expr, 0);
while (handled_component_p (new_expr))
  new_expr = TREE_OPERAND (new_expr, 0);
MR_DEPENDENCE_CLIQUE (new_expr) = newc;
-   set_mem_expr (const_cast  (*iter), new_expr);
+   set_mem_expr (const_cast  (*iter), orig_new_expr);
  }
  }
}


[gcc r15-126] cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

2024-05-03 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:fe40d525619eee9c2821126390df75068df4773a

commit r15-126-gfe40d525619eee9c2821126390df75068df4773a
Author: Alex Coplan 
Date:   Fri May 3 09:23:59 2024 +0100

cfgrtl: Fix MEM_EXPR update in duplicate_insn_chain [PR114924]

The PR shows that when cfgrtl.cc:duplicate_insn_chain attempts to
update the MR_DEPENDENCE_CLIQUE information for a MEM_EXPR we can end up
accidentally dropping (e.g.) an ARRAY_REF from the MEM_EXPR and end up
replacing it with the underlying MEM_REF.  This leads to an
inconsistency in the MEM_EXPR information, and could lead to wrong code.

While the walk down to the MEM_REF is necessary to update
MR_DEPENDENCE_CLIQUE, we should use the outer tree expression for the
MEM_EXPR.  This patch does that.

gcc/ChangeLog:

PR rtl-optimization/114924
* cfgrtl.cc (duplicate_insn_chain): When updating MEM_EXPRs,
don't strip (e.g.) ARRAY_REFs from the final MEM_EXPR.

Diff:
---
 gcc/cfgrtl.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cfgrtl.cc b/gcc/cfgrtl.cc
index 304c429c99b..a5dc3512159 100644
--- a/gcc/cfgrtl.cc
+++ b/gcc/cfgrtl.cc
@@ -4432,12 +4432,13 @@ duplicate_insn_chain (rtx_insn *from, rtx_insn *to,
   since MEM_EXPR is shared so make a copy and
   walk to the subtree again.  */
tree new_expr = unshare_expr (MEM_EXPR (*iter));
+   tree orig_new_expr = new_expr;
if (TREE_CODE (new_expr) == WITH_SIZE_EXPR)
  new_expr = TREE_OPERAND (new_expr, 0);
while (handled_component_p (new_expr))
  new_expr = TREE_OPERAND (new_expr, 0);
MR_DEPENDENCE_CLIQUE (new_expr) = newc;
-   set_mem_expr (const_cast  (*iter), new_expr);
+   set_mem_expr (const_cast  (*iter), orig_new_expr);
  }
  }
}


[gcc r14-9814] aarch64: Fix whitespace in aarch64-ldp-fusion.cc:alias_walker

2024-04-05 Thread Alex Coplan via Gcc-cvs
https://gcc.gnu.org/g:e7d015b2506a1d9e84d9f7182e42e097147527e1

commit r14-9814-ge7d015b2506a1d9e84d9f7182e42e097147527e1
Author: Alex Coplan 
Date:   Fri Apr 5 17:53:47 2024 +0100

aarch64: Fix whitespace in aarch64-ldp-fusion.cc:alias_walker

I spotted this whitespace error during the review of
https://gcc.gnu.org/pipermail/gcc-patches/2024-April/648846.html

gcc/ChangeLog:

* config/aarch64/aarch64-ldp-fusion.cc (struct alias_walker):
Fix double space after const qualifier on valid ().

Diff:
---
 gcc/config/aarch64/aarch64-ldp-fusion.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc 
b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index 22ed95eb743..365dcf48b22 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -2138,7 +2138,7 @@ struct alias_walker
 {
   virtual bool conflict_p (int &budget) const = 0;
   virtual insn_info *insn () const = 0;
-  virtual bool valid () const  = 0;
+  virtual bool valid () const = 0;
   virtual void advance () = 0;
 };