Hi,
here is updated version of the patch to avoid streaming trees that are local.
I made it mostly to see what effect it have on collisions (as I think having
unmergeable trees in the SCC hash artificially increase collision rate for
trees derrived from this).

To my surprise the number are quite different.  Now I get on slightly older
unpatched tree:

[WPA] read 9572344 SCCs of average size 2.030230
[WPA] 19434056 tree bodies read in total
[WPA] tree SCC table: size 4194301, 2047117 elements, collision ratio: 0.784630
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3380248 SCCs, 2334141 collisions (0.690524)
[WPA] Merged 3368715 SCCs
[WPA] Merged 10190141 tree bodies
[WPA] Merged 2664380 types
[WPA] 1779189 types prevailed (4089309 associated trees)
[WPA] GIMPLE canonical type table: size 262139, 98918 elements, 1779274 
searches, 756421 collisions (ratio: 0.425129)
[WPA] GIMPLE canonical type pointer-map: 98918 elements, 4109561 searches
[WPA] # of input files: 1817
[WPA] Compression: 266574384 input bytes, 818483159 uncompressed bytes (ratio: 
3.070374)

While with the patch I get:

[WPA] read 9572334 SCCs of average size 2.030231
[WPA] 19434046 tree bodies read in total
[WPA] tree SCC table: size 4194301, 1827487 elements, collision ratio: 0.797434
[WPA] tree SCC max chain length 140 (size 1)
[WPA] Compared 3317907 SCCs, 2322035 collisions (0.699849)
[WPA] 465571 local SCCs
[WPA] Merged 3307884 SCCs
[WPA] Merged 10001740 tree bodies
[WPA] Merged 2612232 types
[WPA] 1831335 types prevailed (4257855 associated trees)
[WPA] GIMPLE canonical type table: size 262139, 98953 elements, 1831420 
searches, 619322 collisions (ratio: 0.338165)
[WPA] GIMPLE canonical type pointer-map: 98953 elements, 4287782 searches
[WPA] # of input files: 1817
[WPA] Compression: 264291248 input bytes, 816565244 uncompressed bytes (ratio: 
3.089642)

It seems that the number of local SCCs is somewhat low (465571) to affect
overall statistics (I think we however still need for on-disk merging and for
proper handling of anonymous types to decide mergeability early).

Main change was modification to type_in_anonymous_namespace_p to ignore
declarations with no context to patch around builtin types having wrong
flags.  I will double check that anonymous types are recognized correctly,
but we do have testsuite coverage for that, so probably they are.

Honza

Index: lto-streamer-out.c
===================================================================
--- lto-streamer-out.c  (revision 213121)
+++ lto-streamer-out.c  (working copy)
@@ -54,7 +54,49 @@ along with GCC; see the file COPYING3.
 #include "streamer-hooks.h"
 #include "cfgloop.h"
 #include "builtins.h"
+#include "print-tree.h"
 
+/* Return if T can never be shared across units.  */
+static bool
+unit_local_tree_p (tree t)
+{
+  switch (TREE_CODE (t))
+    {
+      case VAR_DECL:
+       /* Automatic variables are always unit local.  */
+       if (!TREE_STATIC (t) && !DECL_EXTERNAL (t)
+           && !DECL_HARD_REGISTER (t))
+         return true;
+       /* ... fall through ... */
+
+      case FUNCTION_DECL:
+       /* Non-public declarations are alwyas local. */
+       if (!TREE_PUBLIC (t))
+         return true;
+
+       /* Public definitions that would cause linker error if
+          appeared in other unit.  */
+       if (TREE_PUBLIC (t)
+           && !DECL_EXTERNAL (t)
+           && !DECL_WEAK (t))
+         return true;
+       return false;
+      case NAMESPACE_DECL:
+       return !TREE_PUBLIC (t);
+      case TRANSLATION_UNIT_DECL:
+       return true;
+      case PARM_DECL:
+      case RESULT_DECL:
+      case LABEL_DECL:
+      case SSA_NAME:
+       return true;
+      default:
+       if (TYPE_P (t)
+           && type_in_anonymous_namespace_p (t))
+         return true;
+       return false;
+    }
+}
 
 static void lto_write_tree (struct output_block*, tree, bool);
 
@@ -740,7 +782,12 @@ hash_tree (struct streamer_tree_cache_d
     if (!SIBLING) \
       hstate.add_int (0); \
     else if (streamer_tree_cache_lookup (cache, SIBLING, &ix)) \
-      hstate.add_int (streamer_tree_cache_get_hash (cache, ix)); \
+      { \
+        hashval_t h = streamer_tree_cache_get_hash (cache, ix); \
+       if (h == STREAMER_TREE_CACHE_HASH_LOCAL) \
+         return STREAMER_TREE_CACHE_HASH_LOCAL; \
+        hstate.add_int (h); \
+      } \
     else if (map) \
       hstate.add_int (*map->get (SIBLING)); \
     else \
@@ -750,6 +797,8 @@ hash_tree (struct streamer_tree_cache_d
   /* Hash TS_BASE.  */
   enum tree_code code = TREE_CODE (t);
   hstate.add_int (code);
+  if (unit_local_tree_p (t))
+    return STREAMER_TREE_CACHE_HASH_LOCAL;
   if (!TYPE_P (t))
     {
       hstate.add_flag (TREE_SIDE_EFFECTS (t));
@@ -1136,7 +1185,9 @@ hash_tree (struct streamer_tree_cache_d
       visit (OMP_CLAUSE_CHAIN (t));
     }
 
-  return hstate.end ();
+  hashval_t v = hstate.end ();
+
+  return v + (v == STREAMER_TREE_CACHE_HASH_LOCAL);
 
 #undef visit
 }
@@ -1166,8 +1217,16 @@ DFS::hash_scc (struct output_block *ob,
 
   /* Compute hash values for the SCC members.  */
   for (unsigned i = 0; i < size; ++i)
-    sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL,
-                                       sccstack[first+i].t);
+    {
+      sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL,
+                                         sccstack[first+i].t);
+      if (sccstack[first+i].hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+        {
+         for (unsigned i = 0; i < size; ++i)
+           sccstack[first+i].hash = STREAMER_TREE_CACHE_HASH_LOCAL;
+         return STREAMER_TREE_CACHE_HASH_LOCAL;
+       }
+    }
 
   if (size == 1)
     return sccstack[first].hash;
@@ -1255,6 +1314,8 @@ DFS::hash_scc (struct output_block *ob,
                  sccstack[first+i].hash
                    = iterative_hash_hashval_t (i,
                                                *map.get (sccstack[first+i].t));
+                 sccstack[first+i].hash += (sccstack[first+i].hash
+                                            == STREAMER_TREE_CACHE_HASH_LOCAL);
                  scc_hash = iterative_hash_hashval_t (scc_hash,
                                                       sccstack[first+i].hash);
                }
@@ -1283,7 +1344,7 @@ DFS::hash_scc (struct output_block *ob,
          for (unsigned i = 0; i < size; ++i)
            sccstack[first+i].hash
              = iterative_hash_hashval_t (sccstack[first+i].hash, scc_hash);
-         return scc_hash;
+         return scc_hash + (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL);
        }
 
       last_classes = classes;
@@ -1383,32 +1444,37 @@ DFS::DFS_write_tree (struct output_block
             any merging there.  */
          hashval_t scc_hash = 0;
          unsigned scc_entry_len = 0;
-         if (!flag_wpa)
+         if (!flag_wpa && !ob->symbol)
            {
              scc_hash = hash_scc (ob, first, size);
 
              /* Put the entries with the least number of collisions first.  */
-             unsigned entry_start = 0;
-             scc_entry_len = size + 1;
-             for (unsigned i = 0; i < size;)
+             if (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL)
                {
-                 unsigned from = i;
-                 for (i = i + 1; i < size
-                      && (sccstack[first + i].hash
-                          == sccstack[first + from].hash); ++i)
-                   ;
-                 if (i - from < scc_entry_len)
+                 unsigned entry_start = 0;
+                 scc_entry_len = size + 1;
+                 for (unsigned i = 0; i < size;)
                    {
-                     scc_entry_len = i - from;
-                     entry_start = from;
+                     unsigned from = i;
+                     for (i = i + 1; i < size
+                          && (sccstack[first + i].hash
+                              == sccstack[first + from].hash); ++i)
+                       ;
+                     if (i - from < scc_entry_len)
+                       {
+                         scc_entry_len = i - from;
+                         entry_start = from;
+                       }
+                   }
+                 for (unsigned i = 0; i < scc_entry_len; ++i)
+                   {
+                     scc_entry tem = sccstack[first + i];
+                     sccstack[first + i] = sccstack[first + entry_start + i];
+                     sccstack[first + entry_start + i] = tem;
                    }
                }
-             for (unsigned i = 0; i < scc_entry_len; ++i)
-               {
-                 scc_entry tem = sccstack[first + i];
-                 sccstack[first + i] = sccstack[first + entry_start + i];
-                 sccstack[first + entry_start + i] = tem;
-               }
+             else
+               scc_entry_len = 1;
 
              if (scc_entry_len == 1)
                ; /* We already sorted SCC deterministically in hash_scc.  */
@@ -1429,7 +1495,7 @@ DFS::DFS_write_tree (struct output_block
          streamer_write_uhwi (ob, scc_hash);
 
          /* Write size-1 SCCs without wrapping them inside SCC bundles.
-            All INTEGER_CSTs need to be handled this way as we need
+            All INTEGER_CSTs need thiso be handled this way as we need
             their type to materialize them.  Also builtins are handled
             this way.
             ???  We still wrap these in LTO_tree_scc so at the
@@ -1449,6 +1515,8 @@ DFS::DFS_write_tree (struct output_block
                  tree t = sccstack[first+i].t;
                  bool exists_p = streamer_tree_cache_insert (ob->writer_cache,
                                                              t, hash, &ix);
+                 gcc_assert (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
+                             || hash == STREAMER_TREE_CACHE_HASH_LOCAL);
                  gcc_assert (!exists_p);
 
                  if (!lto_is_streamable (t))
Index: lto-streamer.h
===================================================================
--- lto-streamer.h      (revision 213121)
+++ lto-streamer.h      (working copy)
@@ -1191,4 +1191,6 @@ DEFINE_DECL_STREAM_FUNCS (TYPE_DECL, typ
 DEFINE_DECL_STREAM_FUNCS (NAMESPACE_DECL, namespace_decl)
 DEFINE_DECL_STREAM_FUNCS (LABEL_DECL, label_decl)
 
+#define STREAMER_TREE_CACHE_HASH_LOCAL 1
+
 #endif /* GCC_LTO_STREAMER_H  */
Index: lto/lto.c
===================================================================
--- lto/lto.c   (revision 213121)
+++ lto/lto.c   (working copy)
@@ -1144,6 +1146,7 @@ static unsigned long num_prevailing_type
 static unsigned long num_type_scc_trees;
 static unsigned long total_scc_size;
 static unsigned long num_sccs_read;
+static unsigned long num_local_nodes;
 static unsigned long total_scc_size_merged;
 static unsigned long num_sccs_merged;
 static unsigned long num_scc_compares;
@@ -1725,9 +1728,7 @@ unify_scc (struct streamer_tree_cache_d
              && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t)))
          || TREE_CODE (t) == LABEL_DECL)
        {
-         /* Avoid doing any work for these cases and do not worry to
-            record the SCCs for further merging.  */
-         return false;
+         gcc_unreachable ();
        }
     }
 
@@ -1889,8 +1890,11 @@ lto_read_decls (struct lto_file_decl_dat
                  || streamer_handle_as_builtin_p (first)))
            continue;
 
+         if (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL)
+           num_local_nodes++;
+
          /* Try to unify the SCC with already existing ones.  */
-         if (!flag_ltrans
+         if (!flag_ltrans && scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL
              && unify_scc (data_in->reader_cache, from,
                            len, scc_entry_len, scc_hash))
            continue;
@@ -2710,13 +2714,10 @@ lto_fixup_prevailing_decls (tree t)
        {
          LTO_NO_PREVAIL (t->decl_with_vis.assembler_name);
        }
-      if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON))
-       {
-         LTO_NO_PREVAIL (DECL_RESULT_FLD (t));
-       }
       if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL))
        {
          LTO_NO_PREVAIL (DECL_ARGUMENTS (t));
+         LTO_NO_PREVAIL (DECL_RESULT (t));
          LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t));
          LTO_NO_PREVAIL (DECL_VINDEX (t));
        }
@@ -3180,6 +3181,7 @@ print_lto_report_1 (void)
       fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx,
               num_scc_compares, num_scc_compare_collisions,
               num_scc_compare_collisions / (double) num_scc_compares);
+      fprintf (stderr, "[%s] %lu local SCCs\n", pfx, num_local_nodes);
       fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged);
       fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx,
               total_scc_size_merged);

Reply via email to