Hi, here is updated version of the patch to avoid streaming trees that are local. I made it mostly to see what effect it have on collisions (as I think having unmergeable trees in the SCC hash artificially increase collision rate for trees derrived from this).
To my surprise the number are quite different. Now I get on slightly older unpatched tree: [WPA] read 9572344 SCCs of average size 2.030230 [WPA] 19434056 tree bodies read in total [WPA] tree SCC table: size 4194301, 2047117 elements, collision ratio: 0.784630 [WPA] tree SCC max chain length 140 (size 1) [WPA] Compared 3380248 SCCs, 2334141 collisions (0.690524) [WPA] Merged 3368715 SCCs [WPA] Merged 10190141 tree bodies [WPA] Merged 2664380 types [WPA] 1779189 types prevailed (4089309 associated trees) [WPA] GIMPLE canonical type table: size 262139, 98918 elements, 1779274 searches, 756421 collisions (ratio: 0.425129) [WPA] GIMPLE canonical type pointer-map: 98918 elements, 4109561 searches [WPA] # of input files: 1817 [WPA] Compression: 266574384 input bytes, 818483159 uncompressed bytes (ratio: 3.070374) While with the patch I get: [WPA] read 9572334 SCCs of average size 2.030231 [WPA] 19434046 tree bodies read in total [WPA] tree SCC table: size 4194301, 1827487 elements, collision ratio: 0.797434 [WPA] tree SCC max chain length 140 (size 1) [WPA] Compared 3317907 SCCs, 2322035 collisions (0.699849) [WPA] 465571 local SCCs [WPA] Merged 3307884 SCCs [WPA] Merged 10001740 tree bodies [WPA] Merged 2612232 types [WPA] 1831335 types prevailed (4257855 associated trees) [WPA] GIMPLE canonical type table: size 262139, 98953 elements, 1831420 searches, 619322 collisions (ratio: 0.338165) [WPA] GIMPLE canonical type pointer-map: 98953 elements, 4287782 searches [WPA] # of input files: 1817 [WPA] Compression: 264291248 input bytes, 816565244 uncompressed bytes (ratio: 3.089642) It seems that the number of local SCCs is somewhat low (465571) to affect overall statistics (I think we however still need for on-disk merging and for proper handling of anonymous types to decide mergeability early). Main change was modification to type_in_anonymous_namespace_p to ignore declarations with no context to patch around builtin types having wrong flags. I will double check that anonymous types are recognized correctly, but we do have testsuite coverage for that, so probably they are. Honza Index: lto-streamer-out.c =================================================================== --- lto-streamer-out.c (revision 213121) +++ lto-streamer-out.c (working copy) @@ -54,7 +54,49 @@ along with GCC; see the file COPYING3. #include "streamer-hooks.h" #include "cfgloop.h" #include "builtins.h" +#include "print-tree.h" +/* Return if T can never be shared across units. */ +static bool +unit_local_tree_p (tree t) +{ + switch (TREE_CODE (t)) + { + case VAR_DECL: + /* Automatic variables are always unit local. */ + if (!TREE_STATIC (t) && !DECL_EXTERNAL (t) + && !DECL_HARD_REGISTER (t)) + return true; + /* ... fall through ... */ + + case FUNCTION_DECL: + /* Non-public declarations are alwyas local. */ + if (!TREE_PUBLIC (t)) + return true; + + /* Public definitions that would cause linker error if + appeared in other unit. */ + if (TREE_PUBLIC (t) + && !DECL_EXTERNAL (t) + && !DECL_WEAK (t)) + return true; + return false; + case NAMESPACE_DECL: + return !TREE_PUBLIC (t); + case TRANSLATION_UNIT_DECL: + return true; + case PARM_DECL: + case RESULT_DECL: + case LABEL_DECL: + case SSA_NAME: + return true; + default: + if (TYPE_P (t) + && type_in_anonymous_namespace_p (t)) + return true; + return false; + } +} static void lto_write_tree (struct output_block*, tree, bool); @@ -740,7 +782,12 @@ hash_tree (struct streamer_tree_cache_d if (!SIBLING) \ hstate.add_int (0); \ else if (streamer_tree_cache_lookup (cache, SIBLING, &ix)) \ - hstate.add_int (streamer_tree_cache_get_hash (cache, ix)); \ + { \ + hashval_t h = streamer_tree_cache_get_hash (cache, ix); \ + if (h == STREAMER_TREE_CACHE_HASH_LOCAL) \ + return STREAMER_TREE_CACHE_HASH_LOCAL; \ + hstate.add_int (h); \ + } \ else if (map) \ hstate.add_int (*map->get (SIBLING)); \ else \ @@ -750,6 +797,8 @@ hash_tree (struct streamer_tree_cache_d /* Hash TS_BASE. */ enum tree_code code = TREE_CODE (t); hstate.add_int (code); + if (unit_local_tree_p (t)) + return STREAMER_TREE_CACHE_HASH_LOCAL; if (!TYPE_P (t)) { hstate.add_flag (TREE_SIDE_EFFECTS (t)); @@ -1136,7 +1185,9 @@ hash_tree (struct streamer_tree_cache_d visit (OMP_CLAUSE_CHAIN (t)); } - return hstate.end (); + hashval_t v = hstate.end (); + + return v + (v == STREAMER_TREE_CACHE_HASH_LOCAL); #undef visit } @@ -1166,8 +1217,16 @@ DFS::hash_scc (struct output_block *ob, /* Compute hash values for the SCC members. */ for (unsigned i = 0; i < size; ++i) - sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL, - sccstack[first+i].t); + { + sccstack[first+i].hash = hash_tree (ob->writer_cache, NULL, + sccstack[first+i].t); + if (sccstack[first+i].hash == STREAMER_TREE_CACHE_HASH_LOCAL) + { + for (unsigned i = 0; i < size; ++i) + sccstack[first+i].hash = STREAMER_TREE_CACHE_HASH_LOCAL; + return STREAMER_TREE_CACHE_HASH_LOCAL; + } + } if (size == 1) return sccstack[first].hash; @@ -1255,6 +1314,8 @@ DFS::hash_scc (struct output_block *ob, sccstack[first+i].hash = iterative_hash_hashval_t (i, *map.get (sccstack[first+i].t)); + sccstack[first+i].hash += (sccstack[first+i].hash + == STREAMER_TREE_CACHE_HASH_LOCAL); scc_hash = iterative_hash_hashval_t (scc_hash, sccstack[first+i].hash); } @@ -1283,7 +1344,7 @@ DFS::hash_scc (struct output_block *ob, for (unsigned i = 0; i < size; ++i) sccstack[first+i].hash = iterative_hash_hashval_t (sccstack[first+i].hash, scc_hash); - return scc_hash; + return scc_hash + (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL); } last_classes = classes; @@ -1383,32 +1444,37 @@ DFS::DFS_write_tree (struct output_block any merging there. */ hashval_t scc_hash = 0; unsigned scc_entry_len = 0; - if (!flag_wpa) + if (!flag_wpa && !ob->symbol) { scc_hash = hash_scc (ob, first, size); /* Put the entries with the least number of collisions first. */ - unsigned entry_start = 0; - scc_entry_len = size + 1; - for (unsigned i = 0; i < size;) + if (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL) { - unsigned from = i; - for (i = i + 1; i < size - && (sccstack[first + i].hash - == sccstack[first + from].hash); ++i) - ; - if (i - from < scc_entry_len) + unsigned entry_start = 0; + scc_entry_len = size + 1; + for (unsigned i = 0; i < size;) { - scc_entry_len = i - from; - entry_start = from; + unsigned from = i; + for (i = i + 1; i < size + && (sccstack[first + i].hash + == sccstack[first + from].hash); ++i) + ; + if (i - from < scc_entry_len) + { + scc_entry_len = i - from; + entry_start = from; + } + } + for (unsigned i = 0; i < scc_entry_len; ++i) + { + scc_entry tem = sccstack[first + i]; + sccstack[first + i] = sccstack[first + entry_start + i]; + sccstack[first + entry_start + i] = tem; } } - for (unsigned i = 0; i < scc_entry_len; ++i) - { - scc_entry tem = sccstack[first + i]; - sccstack[first + i] = sccstack[first + entry_start + i]; - sccstack[first + entry_start + i] = tem; - } + else + scc_entry_len = 1; if (scc_entry_len == 1) ; /* We already sorted SCC deterministically in hash_scc. */ @@ -1429,7 +1495,7 @@ DFS::DFS_write_tree (struct output_block streamer_write_uhwi (ob, scc_hash); /* Write size-1 SCCs without wrapping them inside SCC bundles. - All INTEGER_CSTs need to be handled this way as we need + All INTEGER_CSTs need thiso be handled this way as we need their type to materialize them. Also builtins are handled this way. ??? We still wrap these in LTO_tree_scc so at the @@ -1449,6 +1515,8 @@ DFS::DFS_write_tree (struct output_block tree t = sccstack[first+i].t; bool exists_p = streamer_tree_cache_insert (ob->writer_cache, t, hash, &ix); + gcc_assert (scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL + || hash == STREAMER_TREE_CACHE_HASH_LOCAL); gcc_assert (!exists_p); if (!lto_is_streamable (t)) Index: lto-streamer.h =================================================================== --- lto-streamer.h (revision 213121) +++ lto-streamer.h (working copy) @@ -1191,4 +1191,6 @@ DEFINE_DECL_STREAM_FUNCS (TYPE_DECL, typ DEFINE_DECL_STREAM_FUNCS (NAMESPACE_DECL, namespace_decl) DEFINE_DECL_STREAM_FUNCS (LABEL_DECL, label_decl) +#define STREAMER_TREE_CACHE_HASH_LOCAL 1 + #endif /* GCC_LTO_STREAMER_H */ Index: lto/lto.c =================================================================== --- lto/lto.c (revision 213121) +++ lto/lto.c (working copy) @@ -1144,6 +1146,7 @@ static unsigned long num_prevailing_type static unsigned long num_type_scc_trees; static unsigned long total_scc_size; static unsigned long num_sccs_read; +static unsigned long num_local_nodes; static unsigned long total_scc_size_merged; static unsigned long num_sccs_merged; static unsigned long num_scc_compares; @@ -1725,9 +1728,7 @@ unify_scc (struct streamer_tree_cache_d && !(TREE_PUBLIC (t) || DECL_EXTERNAL (t))) || TREE_CODE (t) == LABEL_DECL) { - /* Avoid doing any work for these cases and do not worry to - record the SCCs for further merging. */ - return false; + gcc_unreachable (); } } @@ -1889,8 +1890,11 @@ lto_read_decls (struct lto_file_decl_dat || streamer_handle_as_builtin_p (first))) continue; + if (scc_hash == STREAMER_TREE_CACHE_HASH_LOCAL) + num_local_nodes++; + /* Try to unify the SCC with already existing ones. */ - if (!flag_ltrans + if (!flag_ltrans && scc_hash != STREAMER_TREE_CACHE_HASH_LOCAL && unify_scc (data_in->reader_cache, from, len, scc_entry_len, scc_hash)) continue; @@ -2710,13 +2714,10 @@ lto_fixup_prevailing_decls (tree t) { LTO_NO_PREVAIL (t->decl_with_vis.assembler_name); } - if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON)) - { - LTO_NO_PREVAIL (DECL_RESULT_FLD (t)); - } if (CODE_CONTAINS_STRUCT (code, TS_FUNCTION_DECL)) { LTO_NO_PREVAIL (DECL_ARGUMENTS (t)); + LTO_NO_PREVAIL (DECL_RESULT (t)); LTO_SET_PREVAIL (DECL_FUNCTION_PERSONALITY (t)); LTO_NO_PREVAIL (DECL_VINDEX (t)); } @@ -3180,6 +3181,7 @@ print_lto_report_1 (void) fprintf (stderr, "[%s] Compared %lu SCCs, %lu collisions (%f)\n", pfx, num_scc_compares, num_scc_compare_collisions, num_scc_compare_collisions / (double) num_scc_compares); + fprintf (stderr, "[%s] %lu local SCCs\n", pfx, num_local_nodes); fprintf (stderr, "[%s] Merged %lu SCCs\n", pfx, num_sccs_merged); fprintf (stderr, "[%s] Merged %lu tree bodies\n", pfx, total_scc_size_merged);