On 24/12/15 11:53, Alan Lawrence wrote:
Here's a new version that fixes the gcc.dg/guality/pr54970.c failures seen on
aarch64 and powerpc64. Prior to SRA handling constant pool decls,
-fdump-tree-esra-details (at -O1 -g) had shown:
   <bb 2>:
   a = *.LC0;
   # DEBUG a$0 => MEM[(int[3] *)&*.LC0]
   a$4_3 = MEM[(int[3] *)&*.LC0 + 4B];
   # DEBUG a$4 => a$4_3
   a$8_4 = MEM[(int[3] *)&*.LC0 + 8B];

The previous patch changed this to:
   <bb 2>:
   SR.5_3 = *.LC0[0];
   SR.6_4 = *.LC0[1];
   SR.7_19 = *.LC0[2];
   SR.8_21 = *.LC1[0];
   SR.9_22 = *.LC1[1];
   SR.10_23 = *.LC1[2];
   # DEBUG a$0 => NULL   // Note here
   a$4_24 = SR.6_4;
   # DEBUG a$4 => a$4_24
   a$8_25 = SR.7_19;

Turns out the DEBUG a$0 => NULL was produced in load_assign_lhs_subreplacements:

          if (lacc && lacc->grp_to_be_debug_replaced)
            {
              gdebug *ds;
              tree drhs;
              struct access *racc = find_access_in_subtree (sad->top_racc,
                                                            offset,
                                                            lacc->size);

              if (racc && racc->grp_to_be_replaced)
                {
                  if (racc->grp_write)
                    drhs = get_access_replacement (racc);
                  else
                    drhs = NULL;  // <=== HERE
                }
...
              ds = gimple_build_debug_bind (get_access_replacement (lacc),
                                            drhs, gsi_stmt (sad->old_gsi));

Prior to the patch, we'd skipped around load_assign_lhs_subreplacements, because
access_has_children_p(racc) (for racc = *.LC0) didn't hold in sra_modify_assign.

I also added a constant_decl_p function, combining the two checks, plus some
testcase fixes.

Bootstrapped + check-gcc,g++ on x86_64, ARM, AArch64,
   also on powerpc64{,le}-none-linux-gnu *in combination with the other patches
   in the series* (I haven't tested the individual patches on PPC),
   plus Ada on ARM and x86_64.

gcc/ChangeLog:

        PR target/63679
        * tree-sra.c (disqualified_constants, constant_decl_p): New.
        (sra_initialize): Allocate disqualified_constants.
        (sra_deinitialize): Free disqualified_constants.
        (disqualify_candidate): Update disqualified_constants when appropriate.
        (create_access): Scan for constant-pool entries as we go along.
        (scalarizable_type_p): Add check against type_contains_placeholder_p.
        (maybe_add_sra_candidate): Allow constant-pool entries.
        (load_assign_lhs_subreplacements): Bind debug for constant pool vars.
        (initialize_constant_pool_replacements): New.
        (sra_modify_assign): Avoid mangling assignments created by previous,
        and don't generate writes into constant pool.
        (sra_modify_function_body): Call initialize_constant_pool_replacements.

gcc/testsuite/ChangeLog:

        * gcc.dg/tree-ssa/sra-17.c: New.
        * gcc.dg/tree-ssa/sra-18.c: New.

Ping.

(The next bit is false, unless you force SRA to happen more widely, but all the above stands)

This also fixes a bunch of other guality tests on AArch64 that were failing
prior to the patch series, and another bunch on PowerPC64 (bigendian -m32), 
listed below.
>
Tests fixed on aarch64-none-linux-gnu:
gcc.dg/guality/pr54970.c   -O1  line 15 *p == 3
gcc.dg/guality/pr54970.c   -O1  line 15 *q == 2
gcc.dg/guality/pr54970.c   -O1  line 20 *p == 13
gcc.dg/guality/pr54970.c   -O1  line 20 *q == 2
gcc.dg/guality/pr54970.c   -O1  line 25 *p == 13
gcc.dg/guality/pr54970.c   -O1  line 25 *q == 12
gcc.dg/guality/pr54970.c   -O1  line 31 *p == 6
gcc.dg/guality/pr54970.c   -O1  line 31 *q == 5
gcc.dg/guality/pr54970.c   -O1  line 36 *p == 26
gcc.dg/guality/pr54970.c   -O1  line 36 *q == 5
gcc.dg/guality/pr54970.c   -O1  line 45 *p == 26
gcc.dg/guality/pr54970.c   -O1  line 45 *q == 25
gcc.dg/guality/pr54970.c   -O1  line 45 p[-1] == 25
gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
gcc.dg/guality/pr54970.c   -O1  line 45 q[1] == 26
gcc.dg/guality/pr56154-1.c   -O1  line pr56154-1.c:20 x.a == 6
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.f == 5.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s1.g == 6.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.f == 0.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:17 s2.g == 6.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.f == 5.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s1.g == 6.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.f == 5.0
gcc.dg/guality/pr59776.c   -O1  line pr59776.c:20 s2.g == 6.0
gcc.dg/guality/sra-1.c   -O1  line 21 a.j == 14
gcc.dg/guality/sra-1.c   -O1  line 32 a[1] == 14
gcc.dg/guality/sra-1.c   -O1  line 43 a.i == 4
gcc.dg/guality/sra-1.c   -O1  line 43 a.j == 14
(and other optimization levels)
On ppc64 bigendian, with the rest of the series (but I expect it's this patch):
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 15 a[0] == 1
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 20 a[0] == 1
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 25 a[0] == 1
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 31 a[0] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 36 a[0] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 a[0] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 p[-2] == 4
unix/-m32: gcc.dg/guality/pr54970.c   -O1  line 45 q[-1] == 4
(again, and other optimization levels).

Cheers, Alan

---
  gcc/testsuite/gcc.dg/tree-ssa/sra-17.c |  19 ++++++
  gcc/testsuite/gcc.dg/tree-ssa/sra-18.c |  28 +++++++++
  gcc/tree-sra.c                         | 104 +++++++++++++++++++++++++++++++--
  3 files changed, 147 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/sra-18.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c 
b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
new file mode 100644
index 0000000..25667f4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-17.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* 
powerpc*-*-* s390*-*-* } } } */
+/* { dg-options "-O2 -fdump-tree-esra --param 
sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+
+int
+main (int argc, char **argv)
+{
+  long a[4] = { 7, 19, 11, 255 };
+  int tot = 0;
+  for (int i = 0; i < 4; i++)
+    tot = (tot*256) + a[i];
+  if (tot == 0x07130bff)
+    return 0;
+  abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1 "esra" } 
} */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = \\\*.LC0\\\[" 4 "esra" } 
} */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c 
b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
new file mode 100644
index 0000000..609fb11
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/sra-18.c
@@ -0,0 +1,28 @@
+/* { dg-do run { target { aarch64*-*-* alpha*-*-* arm*-*-* hppa*-*-* 
powerpc*-*-* s390*-*-* } } } */
+/* { dg-options "-O2 -fdump-tree-esra --param 
sra-max-scalarization-size-Ospeed=32" } */
+
+extern void abort (void);
+struct foo { long x; };
+
+struct bar { struct foo f[2]; };
+
+struct baz { struct bar b[2]; };
+
+int
+main (int argc, char **argv)
+{
+  struct baz a = { { { { { 4 }, { 5 } } }, { { { 6 }, { 7 } } }  } };
+  int tot = 0;
+  for (int i = 0; i < 2; i++)
+    for (int j = 0; j < 2; j++)
+      tot = (tot*256) + a.b[i].f[j].x;
+  if (tot == 0x04050607)
+    return 0;
+  abort ();
+}
+
+/* { dg-final { scan-tree-dump-times "Removing load: a = \\\*.LC0;" 1 "esra" } 
} */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = 
\\\*.LC0\\.b\\\[0\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = 
\\\*.LC0\\.b\\\[0\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = 
\\\*.LC0\\.b\\\[1\\\]\\.f\\\[0\\\]\\.x" 1 "esra" } } */
+/* { dg-final { scan-tree-dump-times "SR.\[0-9_\]+ = 
\\\*.LC0\\.b\\\[1\\\]\\.f\\\[1\\\]\\.x" 1 "esra" } } */
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index c4fea5b..77a2e49 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -328,6 +328,10 @@ candidate (unsigned uid)
     those which cannot be (because they are and need be used as a whole).  */
  static bitmap should_scalarize_away_bitmap, cannot_scalarize_away_bitmap;

+/* Bitmap of candidates in the constant pool, which cannot be scalarized
+   because this would produce non-constant expressions (e.g. Ada).  */
+static bitmap disqualified_constants;
+
  /* Obstack for creation of fancy names.  */
  static struct obstack name_obstack;

@@ -652,6 +656,7 @@ sra_initialize (void)
      (vec_safe_length (cfun->local_decls) / 2);
    should_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
    cannot_scalarize_away_bitmap = BITMAP_ALLOC (NULL);
+  disqualified_constants = BITMAP_ALLOC (NULL);
    gcc_obstack_init (&name_obstack);
    base_access_vec = new hash_map<tree, auto_vec<access_p> >;
    memset (&sra_stats, 0, sizeof (sra_stats));
@@ -670,6 +675,7 @@ sra_deinitialize (void)
    candidates = NULL;
    BITMAP_FREE (should_scalarize_away_bitmap);
    BITMAP_FREE (cannot_scalarize_away_bitmap);
+  BITMAP_FREE (disqualified_constants);
    access_pool.release ();
    assign_link_pool.release ();
    obstack_free (&name_obstack, NULL);
@@ -677,6 +683,13 @@ sra_deinitialize (void)
    delete base_access_vec;
  }

+/* Return true if DECL is a VAR_DECL in the constant pool, false otherwise.  */
+
+static bool constant_decl_p (tree decl)
+{
+  return TREE_CODE (decl) == VAR_DECL && DECL_IN_CONSTANT_POOL (decl);
+}
+
  /* Remove DECL from candidates for SRA and write REASON to the dump file if
     there is one.  */
  static void
@@ -684,6 +697,8 @@ disqualify_candidate (tree decl, const char *reason)
  {
    if (bitmap_clear_bit (candidate_bitmap, DECL_UID (decl)))
      candidates->remove_elt_with_hash (decl, DECL_UID (decl));
+  if (constant_decl_p (decl))
+    bitmap_set_bit (disqualified_constants, DECL_UID (decl));

    if (dump_file && (dump_flags & TDF_DETAILS))
      {
@@ -835,8 +850,11 @@ create_access_1 (tree base, HOST_WIDE_INT offset, 
HOST_WIDE_INT size)
    return access;
  }

+static bool maybe_add_sra_candidate (tree);
+
  /* Create and insert access for EXPR. Return created access, or NULL if it is
-   not possible.  */
+   not possible.  Also scan for uses of constant pool as we go along and add
+   to candidates.  */

  static struct access *
  create_access (tree expr, gimple *stmt, bool write)
@@ -859,6 +877,25 @@ create_access (tree expr, gimple *stmt, bool write)
    else
      ptr = false;

+  /* For constant-pool entries, check we can substitute the constant value.  */
+  if (constant_decl_p (base)
+      && (sra_mode == SRA_MODE_EARLY_INTRA || sra_mode == SRA_MODE_INTRA))
+    {
+      gcc_assert (!bitmap_bit_p (disqualified_constants, DECL_UID (base)));
+      if (expr != base
+         && !is_gimple_reg_type (TREE_TYPE (expr))
+         && dump_file && (dump_flags & TDF_DETAILS))
+       {
+         /* This occurs in Ada with accesses to ARRAY_RANGE_REFs,
+            and elements of multidimensional arrays (which are
+            multi-element arrays in their own right).  */
+         fprintf (dump_file, "Allowing non-reg-type load of part"
+                             " of constant-pool entry: ");
+         print_generic_expr (dump_file, expr, 0);
+       }
+      maybe_add_sra_candidate (base);
+    }
+
    if (!DECL_P (base) || !bitmap_bit_p (candidate_bitmap, DECL_UID (base)))
      return NULL;

@@ -918,6 +955,8 @@ static bool
  scalarizable_type_p (tree type)
  {
    gcc_assert (!is_gimple_reg_type (type));
+  if (type_contains_placeholder_p (type))
+    return false;

    switch (TREE_CODE (type))
    {
@@ -1852,7 +1891,10 @@ maybe_add_sra_candidate (tree var)
        reject (var, "not aggregate");
        return false;
      }
-  if (needs_to_live_in_memory (var))
+  /* Allow constant-pool entries (that "need to live in memory")
+     unless we are doing IPA SRA.  */
+  if (needs_to_live_in_memory (var)
+      && (sra_mode == SRA_MODE_EARLY_IPA || !constant_decl_p (var)))
      {
        reject (var, "needs to live in memory");
        return false;
@@ -3113,7 +3155,7 @@ load_assign_lhs_subreplacements (struct access *lacc,

              if (racc && racc->grp_to_be_replaced)
                {
-                 if (racc->grp_write)
+                 if (racc->grp_write || constant_decl_p (racc->base))
                    drhs = get_access_replacement (racc);
                  else
                    drhs = NULL;
@@ -3272,6 +3314,9 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator 
*gsi)
    racc = get_access_for_expr (rhs);
    if (!lacc && !racc)
      return SRA_AM_NONE;
+  /* Avoid modifying initializations of constant-pool replacements.  */
+  if (racc && (racc->replacement_decl == lhs))
+    return SRA_AM_NONE;

    loc = gimple_location (stmt);
    if (lacc && lacc->grp_to_be_replaced)
@@ -3388,7 +3433,8 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator 
*gsi)
        || contains_vce_or_bfcref_p (lhs)
        || stmt_ends_bb_p (stmt))
      {
-      if (access_has_children_p (racc))
+      /* No need to copy into a constant-pool, it comes pre-initialized.  */
+      if (access_has_children_p (racc) && !constant_decl_p (racc->base))
        generate_subtree_copies (racc->first_child, rhs, racc->offset, 0, 0,
                                 gsi, false, false, loc);
        if (access_has_children_p (lacc))
@@ -3491,6 +3537,54 @@ sra_modify_assign (gimple *stmt, gimple_stmt_iterator 
*gsi)
      }
  }

+/* Set any scalar replacements of values in the constant pool to the initial
+   value of the constant.  (Constant-pool decls like *.LC0 have effectively
+   been initialized before the program starts, we must do the same for their
+   replacements.)  Thus, we output statements like 'SR.1 = *.LC0[0];' into
+   the function's entry block.  */
+
+static void
+initialize_constant_pool_replacements (void)
+{
+  gimple_seq seq = NULL;
+  gimple_stmt_iterator gsi = gsi_start (seq);
+  bitmap_iterator bi;
+  unsigned i;
+
+  EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
+    if (bitmap_bit_p (should_scalarize_away_bitmap, i)
+       && !bitmap_bit_p (cannot_scalarize_away_bitmap, i))
+      {
+       tree var = candidate (i);
+       if (!constant_decl_p (var))
+         continue;
+       vec<access_p> *access_vec = get_base_access_vector (var);
+       if (!access_vec)
+         continue;
+       for (unsigned i = 0; i < access_vec->length (); i++)
+         {
+           struct access *access = (*access_vec)[i];
+           if (!access->replacement_decl)
+             continue;
+           gassign *stmt = gimple_build_assign (
+             get_access_replacement (access), unshare_expr (access->expr));
+           if (dump_file && (dump_flags & TDF_DETAILS))
+             {
+               fprintf (dump_file, "Generating constant initializer: ");
+               print_gimple_stmt (dump_file, stmt, 0, 1);
+               fprintf (dump_file, "\n");
+             }
+           gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
+           update_stmt (stmt);
+         }
+      }
+
+  seq = gsi_seq (gsi);
+  if (seq)
+    gsi_insert_seq_on_edge_immediate (
+      single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)), seq);
+}
+
  /* Traverse the function body and all modifications as decided in
     analyze_all_variable_accesses.  Return true iff the CFG has been
     changed.  */
@@ -3501,6 +3595,8 @@ sra_modify_function_body (void)
    bool cfg_changed = false;
    basic_block bb;

+  initialize_constant_pool_replacements ();
+
    FOR_EACH_BB_FN (bb, cfun)
      {
        gimple_stmt_iterator gsi = gsi_start_bb (bb);


Reply via email to