Re: Avoid multiple entry SCC regions
Hi, here is updated version of the patch to avoid streaming trees that are local. I made it mostly to see what effect it have on collisions (as I think having unmergeable trees in the SCC hash artificially increase collision rate for trees derrived from this). To my surprise the number are quite different. Now I get on slightly older unpatched tree: [WPA] read 9572344 SCCs of average size 2.030230 [WPA] 19434056 tree bodies read in total [WPA] tree SCC table: size 4194301, 2047117 elements, collision ratio: 0.784630 [WPA] tree SCC max chain length 140 (size 1) [WPA] Compared 3380248 SCCs, 2334141 collisions (0.690524) [WPA] Merged 3368715 SCCs [WPA] Merged 10190141 tree bodies [WPA] Merged 2664380 types [WPA] 1779189 types prevailed (4089309 associated trees) [WPA] GIMPLE canonical type table: size 262139, 98918 elements, 1779274 searches, 756421 collisions (ratio: 0.425129) [WPA] GIMPLE canonical type pointer-map: 98918 elements, 4109561 searches [WPA] # of input files: 1817 [WPA] Compression: 266574384 input bytes, 818483159 uncompressed bytes (ratio: 3.070374) While with the patch I get: [WPA] read 9572334 SCCs of average size 2.030231 [WPA] 19434046 tree bodies read in total [WPA] tree SCC table: size 4194301, 1827487 elements, collision ratio: 0.797434 [WPA] tree SCC max chain length 140 (size 1) [WPA] Compared 3317907 SCCs, 2322035 collisions (0.699849) [WPA] 465571 local SCCs [WPA] Merged 3307884 SCCs [WPA] Merged 10001740 tree bodies [WPA] Merged 2612232 types [WPA] 1831335 types prevailed (4257855 associated trees) [WPA] GIMPLE canonical type table: size 262139, 98953 elements, 1831420 searches, 619322 collisions (ratio: 0.338165) [WPA] GIMPLE canonical type pointer-map: 98953 elements, 4287782 searches [WPA] # of input files: 1817 [WPA] Compression: 264291248 input bytes, 816565244 uncompressed bytes (ratio: 3.089642) It seems that the number of local SCCs is somewhat low (465571) to affect overall statistics (I think we however still need for on-disk merging and for proper handling of anonymous types to decide mergeability early). Main change was modification to type_in_anonymous_namespace_p to ignore declarations with no context to patch around builtin types having wrong flags. I will double check that anonymous types are recognized correctly, but we do have testsuite coverage for that, so probably they are. Honza Index: lto-streamer-out.c === --- lto-streamer-out.c (revision 213121) +++ lto-streamer-out.c (working copy) @@ -54,7 +54,49 @@ along with GCC; see the file COPYING3. #include streamer-hooks.h #include cfgloop.h #include builtins.h +#include print-tree.h +/* Return if T can never be shared across units. */ +static bool +unit_local_tree_p (tree t) +{ + switch (TREE_CODE (t)) +{ + case VAR_DECL: + /* Automatic variables are always unit local. */ + if (!TREE_STATIC (t) !DECL_EXTERNAL (t) +!DECL_HARD_REGISTER (t)) + return true; + /* ... fall through ... */ + + case FUNCTION_DECL: + /* Non-public declarations are alwyas local. */ + if (!TREE_PUBLIC (t)) + return true; + + /* Public definitions that would cause linker error if + appeared in other unit. */ + if (TREE_PUBLIC (t) +!DECL_EXTERNAL (t) +!DECL_WEAK (t)) + return true; + return false; + case NAMESPACE_DECL: + return !TREE_PUBLIC (t); + case TRANSLATION_UNIT_DECL: + return true; + case PARM_DECL: + case RESULT_DECL: + case LABEL_DECL: + case SSA_NAME: + return true; + default: + if (TYPE_P (t) +type_in_anonymous_namespace_p (t)) + return true; + return false; +} +} static void lto_write_tree (struct output_block*, tree, bool); @@ -740,7 +782,12 @@ hash_tree (struct streamer_tree_cache_d if (!SIBLING) \ hstate.add_int (0); \ else if (streamer_tree_cache_lookup (cache, SIBLING, ix)) \ - hstate.add_int (streamer_tree_cache_get_hash (cache, ix)); \ + { \ +hashval_t h = streamer_tree_cache_get_hash (cache, ix); \ + if (h == STREAMER_TREE_CACHE_HASH_LOCAL) \ + return STREAMER_TREE_CACHE_HASH_LOCAL; \ +hstate.add_int (h); \ + } \ else if (map) \ hstate.add_int (*map-get (SIBLING)); \ else \ @@ -750,6 +797,8 @@ hash_tree (struct streamer_tree_cache_d /* Hash TS_BASE. */ enum tree_code code = TREE_CODE (t); hstate.add_int (code); + if (unit_local_tree_p (t)) +return STREAMER_TREE_CACHE_HASH_LOCAL; if (!TYPE_P (t)) { hstate.add_flag (TREE_SIDE_EFFECTS (t)); @@ -1136,7 +1185,9 @@ hash_tree (struct streamer_tree_cache_d visit (OMP_CLAUSE_CHAIN (t)); } - return hstate.end (); + hashval_t v = hstate.end (); + + return v + (v == STREAMER_TREE_CACHE_HASH_LOCAL); #undef visit } @@ -1166,8 +1217,16 @@
Avoid multiple entry SCC regions
Hi, this patch make SCC hashes stronger by using DFS to get entry point independent order. The DFS refactoring work is Richards, only hash_tree changes are mine and I am responsible for all bugs :) Details are hopefully well documented in hash_scc. I tested the patch on Firefox and we manage to get rid of all the duplicates within SCC regions that allows further simplification downstream on SCC streaming and merging. I am lto bootstrapping/regtesting x86_64-linux and intend to comming once it passes. Honza Richard Biener rguent...@suse.de Jan Hubicka hubi...@ucw.cz * lto-streamer-out.c (struct sccs): Turn to ... (class DFS): ... this one; refactor the DFS walk so it can be re-done on per-SCC basis. (DFS::DFS): New constructor. (DFS::~DFS): New destructor. (hash_tree): Add new MAP argument holding in-SCC hash values; remove POINTER_TYPE hashing hack. (scc_entry_compare): Rename to ... (DFS::scc_entry_compare): ... this one. (hash_scc): Rename to ... (DFS::hash_scc): ... this one; pass output_block instead of streamer_cache; work harder to get unique and stable SCC hashes. (DFS_write_tree): Rename to ... (DFS::DFS_write_tree): ... this one; add SINGLE_P parameter. (lto_output_tree): Update. Index: lto-streamer-out.c === --- lto-streamer-out.c (revision 212994) +++ lto-streamer-out.c (working copy) @@ -439,36 +440,71 @@ lto_output_tree_1 (struct output_block * } } -struct sccs +class DFS { - unsigned int dfsnum; - unsigned int low; +public: + DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, + bool single_p); + ~DFS (); + + struct scc_entry + { +tree t; +hashval_t hash; + }; + vecscc_entry sccstack; + +private: + struct sccs + { +unsigned int dfsnum; +unsigned int low; + }; + + static int scc_entry_compare (const void *, const void *); + + void DFS_write_tree_body (struct output_block *ob, + tree expr, sccs *expr_state, bool ref_p, + bool single_p); + + void DFS_write_tree (struct output_block *ob, sccs *from_state, + tree expr, bool ref_p, bool this_ref_p, + bool single_p); + hashval_t + hash_scc (struct output_block *ob, unsigned first, unsigned size); + + unsigned int next_dfs_num; + struct pointer_map_t *sccstate; + struct obstack sccstate_obstack; }; -struct scc_entry +DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, + bool single_p) { - tree t; - hashval_t hash; -}; - -static unsigned int next_dfs_num; -static vecscc_entry sccstack; -static struct pointer_map_t *sccstate; -static struct obstack sccstate_obstack; + sccstack.create (0); + sccstate = pointer_map_create (); + gcc_obstack_init (sccstate_obstack); + next_dfs_num = 1; + DFS_write_tree (ob, NULL, expr, ref_p, this_ref_p, single_p); +} -static void -DFS_write_tree (struct output_block *ob, sccs *from_state, - tree expr, bool ref_p, bool this_ref_p); +DFS::~DFS () +{ + sccstack.release (); + pointer_map_destroy (sccstate); + obstack_free (sccstate_obstack, NULL); +} /* Handle the tree EXPR in the DFS walk with SCC state EXPR_STATE and DFS recurse for all tree edges originating from it. */ -static void -DFS_write_tree_body (struct output_block *ob, -tree expr, sccs *expr_state, bool ref_p) +void +DFS::DFS_write_tree_body (struct output_block *ob, + tree expr, sccs *expr_state, bool ref_p, + bool single_p) { #define DFS_follow_tree_edge(DEST) \ - DFS_write_tree (ob, expr_state, DEST, ref_p, ref_p) + DFS_write_tree (ob, expr_state, DEST, ref_p, ref_p, single_p) enum tree_code code; @@ -689,16 +725,25 @@ DFS_write_tree_body (struct output_block #undef DFS_follow_tree_edge } -/* Return a hash value for the tree T. */ +/* Return a hash value for the tree T. + CACHE holds hash values of trees outside current SCC. MAP, if non-NULL, + may hold hash values if trees inside current SCC. */ static hashval_t -hash_tree (struct streamer_tree_cache_d *cache, tree t) +hash_tree (struct streamer_tree_cache_d *cache, hash_maptree, hashval_t *map, tree t) { #define visit(SIBLING) \ do { \ unsigned ix; \ -if (SIBLING streamer_tree_cache_lookup (cache, SIBLING, ix)) \ - v = iterative_hash_hashval_t (streamer_tree_cache_get_hash (cache, ix), v); \ +if (!SIBLING) \ + v = iterative_hash_hashval_t (0, v); \ +else if (streamer_tree_cache_lookup (cache, SIBLING, ix)) \ + v = iterative_hash_hashval_t (streamer_tree_cache_get_hash (cache, ix), \ + v); \ +else if (map) \ + v = iterative_hash_hashval_t (*map-get (SIBLING), v); \ +else \ + v =
Re: Avoid multiple entry SCC regions
Jan Hubicka hubi...@ucw.cz writes: I am lto bootstrapping/regtesting x86_64-linux and intend to comming once it passes. You'll have to redo it with hstates, sorry, as it conflicts with my patchkit which I checked in earlier. -Andi -- a...@linux.intel.com -- Speaking for myself only
Re: Avoid multiple entry SCC regions
Jan Hubicka hubi...@ucw.cz writes: I am lto bootstrapping/regtesting x86_64-linux and intend to comming once it passes. You'll have to redo it with hstates, sorry, as it conflicts with my patchkit which I checked in earlier. Yep I noticed that earlier and I am testing updated patch. I think your incremental hashing should be also use within scc_hash calculation, but I will leave that for incremnetal patch. Honza -Andi -- a...@linux.intel.com -- Speaking for myself only
Re: Avoid multiple entry SCC regions
Jan Hubicka hubi...@ucw.cz writes: I am lto bootstrapping/regtesting x86_64-linux and intend to comming once it passes. You'll have to redo it with hstates, sorry, as it conflicts with my patchkit which I checked in earlier. Hi, this is vairant of patch I comitted. Richard Biener rguent...@suse.de Jan Hubicka hubi...@ucw.cz * lto-streamer-out.c (struct sccs): Turn to ... (class DFS): ... this one; refactor the DFS walk so it can be re-done on per-SCC basis. (DFS::DFS): New constructor. (DFS::~DFS): New destructor. (hash_tree): Add new MAP argument holding in-SCC hash values; remove POINTER_TYPE hashing hack. (scc_entry_compare): Rename to ... (DFS::scc_entry_compare): ... this one. (hash_scc): Rename to ... (DFS::hash_scc): ... this one; pass output_block instead of streamer_cache; work harder to get unique and stable SCC hashes. (DFS_write_tree): Rename to ... (DFS::DFS_write_tree): ... this one; add SINGLE_P parameter. (lto_output_tree): Update. Index: lto-streamer-out.c === --- lto-streamer-out.c (revision 213058) +++ lto-streamer-out.c (working copy) @@ -440,36 +440,71 @@ lto_output_tree_1 (struct output_block * } } -struct sccs +class DFS { - unsigned int dfsnum; - unsigned int low; +public: + DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, + bool single_p); + ~DFS (); + + struct scc_entry + { +tree t; +hashval_t hash; + }; + vecscc_entry sccstack; + +private: + struct sccs + { +unsigned int dfsnum; +unsigned int low; + }; + + static int scc_entry_compare (const void *, const void *); + + void DFS_write_tree_body (struct output_block *ob, + tree expr, sccs *expr_state, bool ref_p, + bool single_p); + + void DFS_write_tree (struct output_block *ob, sccs *from_state, + tree expr, bool ref_p, bool this_ref_p, + bool single_p); + hashval_t + hash_scc (struct output_block *ob, unsigned first, unsigned size); + + unsigned int next_dfs_num; + struct pointer_map_t *sccstate; + struct obstack sccstate_obstack; }; -struct scc_entry +DFS::DFS (struct output_block *ob, tree expr, bool ref_p, bool this_ref_p, + bool single_p) { - tree t; - hashval_t hash; -}; + sccstack.create (0); + sccstate = pointer_map_create (); + gcc_obstack_init (sccstate_obstack); + next_dfs_num = 1; + DFS_write_tree (ob, NULL, expr, ref_p, this_ref_p, single_p); +} -static unsigned int next_dfs_num; -static vecscc_entry sccstack; -static struct pointer_map_t *sccstate; -static struct obstack sccstate_obstack; - -static void -DFS_write_tree (struct output_block *ob, sccs *from_state, - tree expr, bool ref_p, bool this_ref_p); +DFS::~DFS () +{ + sccstack.release (); + pointer_map_destroy (sccstate); + obstack_free (sccstate_obstack, NULL); +} /* Handle the tree EXPR in the DFS walk with SCC state EXPR_STATE and DFS recurse for all tree edges originating from it. */ -static void -DFS_write_tree_body (struct output_block *ob, -tree expr, sccs *expr_state, bool ref_p) +void +DFS::DFS_write_tree_body (struct output_block *ob, + tree expr, sccs *expr_state, bool ref_p, + bool single_p) { #define DFS_follow_tree_edge(DEST) \ - DFS_write_tree (ob, expr_state, DEST, ref_p, ref_p) + DFS_write_tree (ob, expr_state, DEST, ref_p, ref_p, single_p) enum tree_code code; @@ -690,18 +725,26 @@ DFS_write_tree_body (struct output_block #undef DFS_follow_tree_edge } -/* Return a hash value for the tree T. */ +/* Return a hash value for the tree T. + CACHE holds hash values of trees outside current SCC. MAP, if non-NULL, + may hold hash values if trees inside current SCC. */ static hashval_t -hash_tree (struct streamer_tree_cache_d *cache, tree t) +hash_tree (struct streamer_tree_cache_d *cache, hash_maptree, hashval_t *map, tree t) { inchash hstate; #define visit(SIBLING) \ do { \ unsigned ix; \ -if (SIBLING streamer_tree_cache_lookup (cache, SIBLING, ix)) \ +if (!SIBLING) \ + hstate.add_int (0); \ +else if (streamer_tree_cache_lookup (cache, SIBLING, ix)) \ hstate.add_int (streamer_tree_cache_get_hash (cache, ix)); \ +else if (map) \ + hstate.add_int (*map-get (SIBLING)); \ +else \ + hstate.add_int (1); \ } while (0) /* Hash TS_BASE. */ @@ -905,23 +948,7 @@ hash_tree (struct streamer_tree_cache_d if (CODE_CONTAINS_STRUCT (code, TS_TYPED)) { - if (POINTER_TYPE_P (t)) - { - /* For pointers factor in the pointed-to type recursively as -we cannot recurse through only pointers. -??? We can generalize this by keeping track