https://gcc.gnu.org/g:7fe86bb107f7f8c40820ce61572204307266618b

commit r16-4369-g7fe86bb107f7f8c40820ce61572204307266618b
Author: Andrew Pinski <[email protected]>
Date:   Wed Sep 10 22:05:26 2025 -0700

    forwprop: Add a simple DSE after a clobber
    
    After copy propagation for aggregates patches we might end up with
    now:
    ```
    tmp = a;
    b = a; // was b = tmp;
    tmp = {CLOBBER};
    ```
    To help out ESRA, it would be a good idea to remove the `tmp = a` statement 
as
    there is no DSE between frowprop and ESRA. copy-prop-aggregate-sra-1.c is 
an example
    where the removal of the copy helps ESRA.
    
    This adds a simple DSE which is only designed to remove the `tmp = a` 
statement.
    This shows up a few times in many C++ code including the code from the 
javascript
    interpreter in ladybird, and in the "fake" testcase in PR 108653 and in the 
aarch64
    specific PR 89967.
    
    This is disabled for -Og as we don't do dse there either.
    intent_optimize_10.f90 testcase needed to be updated as the constant
    shows up in a debug statement now.
    
    Changes since v1:
    * v2: Add much more comments in the code instead of just relying on the 
commit message.
          Count the maybe_use towards the aliasing lookup limit (increase the 
non-full walk limit to 4
            to account for that).
          Use direct comparison instead of operand_equal_p since we are 
comparing against a DECL.
    
    Bootstrapped and tested on x86_64-linux-gnu.
    
    gcc/ChangeLog:
    
            * tree-ssa-forwprop.cc (do_simple_agr_dse): New function.
            (pass_forwprop::execute): Call do_simple_agr_dse for clobbers.
    
    gcc/testsuite/ChangeLog:
    
            * gfortran.dg/intent_optimize_10.f90: Update so -g won't fail.
            * gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c: New testcase.
    
    Signed-off-by: Andrew Pinski <[email protected]>

Diff:
---
 .../gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c    |  33 +++++++
 gcc/testsuite/gfortran.dg/intent_optimize_10.f90   |   2 +-
 gcc/tree-ssa-forwprop.cc                           | 106 ++++++++++++++++++++-
 3 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c
new file mode 100644
index 000000000000..baabecf5d274
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-prop-aggregate-sra-1.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-forwprop1-details  -fdump-tree-esra-details " 
} */
+
+struct s1
+{
+  int t[1024];
+};
+
+struct s1 f(void);
+
+void g(int a, int b, int );
+void p(struct s1);
+static inline void p1(struct s1 inner)
+{
+  p(inner);
+}
+
+void h(struct s1 outer)
+{
+  p1(outer);
+  g(outer.t[0], outer.t[1], outer.t[2]);
+}
+/* Forwprop should be able to copy prop the copy of `inner = outer` to the 
call of p.
+   Also remove this copy. */
+
+/* { dg-final { scan-tree-dump-times "after previous" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "Removing dead store stmt inner = outer" 
1 "forwprop1" } } */
+
+/* The extra copy that was done by inlining is removed so SRA should not 
decide to cause
+   inner nor outer to be scalarized even for the 3 elements accessed 
afterwards.  */
+/* { dg-final { scan-tree-dump-times "Disqualifying inner" 2 "esra" } } */
+/* { dg-final { scan-tree-dump-times "Disqualifying outer" 1 "esra" } } */
+
diff --git a/gcc/testsuite/gfortran.dg/intent_optimize_10.f90 
b/gcc/testsuite/gfortran.dg/intent_optimize_10.f90
index d8bc1bb3b7ba..214f04cf70dd 100644
--- a/gcc/testsuite/gfortran.dg/intent_optimize_10.f90
+++ b/gcc/testsuite/gfortran.dg/intent_optimize_10.f90
@@ -63,4 +63,4 @@ end program main
 
 ! There is a clobber for tc, so we should manage to optimize away the 
associated initialization constant (but not other
 ! initialization constants).
-! { dg-final { scan-tree-dump-not "123456789" "optimized" { target 
__OPTIMIZE__ } } }
+! { dg-final { scan-tree-dump-not "= 123456789" "optimized" { target 
__OPTIMIZE__ } } }
diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index 749708f05a29..68e80baa46c0 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -1769,6 +1769,108 @@ optimize_agr_copyprop (gimple *stmt)
     }
 }
 
+/* Simple DSE of the lhs from a clobber STMT.
+   This is used mostly to clean up from optimize_agr_copyprop and
+   to remove (exactly one) extra copy that might later on confuse SRA.
+   An example is:
+   ;; write to a and such.
+   b = a; // This statement is to be removed
+   b = {CLOBBER};
+   SRA will totally scalarize b (which means also a) here for the extra copy
+   which is not something welcomed. So removing the copy will
+   allow SRA to move the scalarization of a further down or not at all.
+   */
+static void
+do_simple_agr_dse (gassign *stmt, bool full_walk)
+{
+  /* Don't do this while in -Og as we want to keep around the copy
+     for debuggability.  */
+  if (optimize_debug)
+    return;
+  ao_ref read;
+  basic_block bb = gimple_bb (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  /* Only handle clobbers of a full decl.  */
+  if (!DECL_P (lhs))
+    return;
+  ao_ref_init (&read, lhs);
+  tree vuse = gimple_vuse (stmt);
+  unsigned limit = full_walk ? param_sccvn_max_alias_queries_per_access : 4;
+  while (limit)
+    {
+      gimple *ostmt = SSA_NAME_DEF_STMT (vuse);
+      /* Don't handle phis, just declare to be done. */
+      if (is_a<gphi*>(ostmt) || gimple_nop_p (ostmt))
+       break;
+      basic_block obb = gimple_bb (ostmt);
+      /* If the clobber is not fully dominating the statement define,
+        then it is not "simple" to detect if the define is fully clobbered.  */
+      if (obb != bb && !dominated_by_p (CDI_DOMINATORS, bb, obb))
+       return;
+      gimple *use_stmt;
+      imm_use_iterator iter;
+      FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_vdef (ostmt))
+       {
+         basic_block ubb = gimple_bb (use_stmt);
+         if (stmt == use_stmt)
+           continue;
+         /* The use needs to be dominating the clobber. */
+         if ((ubb != bb && !dominated_by_p (CDI_DOMINATORS, bb, ubb))
+             || ref_maybe_used_by_stmt_p (use_stmt, &read, false))
+           return;
+         /* Count the above alias lookup towards the limit. */
+         limit--;
+         if (limit == 0)
+           return;
+       }
+      vuse = gimple_vuse (ostmt);
+
+      /* This an assignment store to the clobbered decl,
+        then maybe remove it. A call is not handled here as
+        the rhs will not make a difference for SRA. */
+      if (is_a <gassign*>(ostmt)
+         && gimple_store_p (ostmt)
+         && !gimple_clobber_p (ostmt)
+         && lhs == gimple_assign_lhs (ostmt))
+       {
+         /* Don't remove stores/statements that are needed for non-call
+             eh to work.  */
+         if (stmt_unremovable_because_of_non_call_eh_p (cfun, ostmt))
+           return;
+         /* If we delete a stmt that could throw, mark the block
+            in to_purge to cleanup afterwards.  */
+         if (stmt_could_throw_p (cfun, ostmt))
+           bitmap_set_bit (to_purge, obb->index);
+         gimple_stmt_iterator gsi = gsi_for_stmt (ostmt);
+         if (dump_file && (dump_flags & TDF_DETAILS))
+           {
+             fprintf (dump_file, "Removing dead store stmt ");
+             print_gimple_stmt (dump_file, ostmt, 0);
+             fprintf (dump_file, "\n");
+           }
+         unlink_stmt_vdef (ostmt);
+         release_defs (ostmt);
+         gsi_remove (&gsi, true);
+         statistics_counter_event (cfun, "delete dead store", 1);
+         /* Only remove the first store previous statement. */
+         return;
+       }
+      /* If the statement uses or maybe writes to the decl,
+        then nothing is to be removed. Don't know if the write
+        to the decl is partial write or a full one so the need
+        to stop.
+        e.g.
+        b.c = a;
+          Easier to stop here rather than do a full partial
+          dse of this statement.
+        b = {CLOBBER}; */
+      if (stmt_may_clobber_ref_p_1 (ostmt, &read, false)
+         || ref_maybe_used_by_stmt_p (ostmt, &read, false))
+       return;
+      limit--;
+    }
+}
+
 /* Optimizes builtin memcmps for small constant sizes.
    GSI_P is the GSI for the call. STMT is the call itself.
    */
@@ -5495,7 +5597,9 @@ pass_forwprop::execute (function *fun)
                  {
                    tree rhs1 = gimple_assign_rhs1 (stmt);
                    enum tree_code code = gimple_assign_rhs_code (stmt);
-                   if (gimple_store_p (stmt))
+                   if (gimple_clobber_p (stmt))
+                     do_simple_agr_dse (as_a<gassign*>(stmt), full_walk);
+                   else if (gimple_store_p (stmt))
                      {
                        optimize_aggr_zeroprop (stmt, full_walk);
                        if (gimple_assign_load_p (stmt))

Reply via email to