> > Ok, not streaming and comparing TREE_USED gets it improved to I will try to gather better data tomorrow. My mozilla build died on disk space, but according to stats we are now at about 7GB of GGC memory after merging. I was playing with the following patch that implements testing whether types are same in my (probably naive and wrong) understanding of ODR rule in C++
It prints type pairs that seems same and then it verifies that they are having same names and they are in same namespaces and records. On Javascript there are 5000 types found same by devirtualization code this way that are not having the same MAIN VARIANT. I gess those trees may be good starting point for you to look why they are not merged. I suppose that once we have maintenable code base we can get into more aggressive merging in special cases. Requiring trees to be exactly same is a good default behaviour. We however may take advantage of extra knowledge. FE may tag types/decls that are subject to ODR rule and for those we can reduce the hash to be based only on name+context and we can even output sane diagnostic on mismatches. Simiarly I think it would help a lot if we proactively merged !can_prevail_p decls with matching types into those that can prevail by hashing PUBLIC decls only by their assembler name. Merging those should subsequently allow collapsing the types that are otherwise kept separate just because associated vtables are having differences in EXTERNAL and PUBLIC flags on the methods and such. Index: tree.c =================================================================== --- tree.c (revision 200064) +++ tree.c (working copy) @@ -11618,6 +11711,91 @@ lhd_gcc_personality (void) return gcc_eh_personality_decl; } +/* For languages with One Definition Rule, work out if + decls are actually the same even if the tree representation + differs. This handles only decls appearing in TYPE_NAME + and TYPE_CONTEXT. That is NAMESPACE_DECL, TYPE_DECL, + RECORD_TYPE and IDENTIFIER_NODE. */ + +static bool +decls_same_for_odr (tree decl1, tree decl2) +{ + if (decl1 == decl2) + return true; + if (!decl1 || !decl2) + { + fprintf (stderr, "Nesting mismatch\n"); + debug_tree (decl1); + debug_tree (decl2); + return false; + } + if (TREE_CODE (decl1) != TREE_CODE (decl2)) + { + fprintf (stderr, "Code mismatch\n"); + debug_tree (decl1); + debug_tree (decl2); + return false; + } + if (TREE_CODE (decl1) == TRANSLATION_UNIT_DECL) + return true; + if (TREE_CODE (decl1) != NAMESPACE_DECL + && TREE_CODE (decl1) != RECORD_TYPE + && TREE_CODE (decl1) != TYPE_DECL) + { + fprintf (stderr, "Decl type mismatch\n"); + debug_tree (decl1); + return false; + } + if (!DECL_NAME (decl1)) + { + fprintf (stderr, "Anonymous; name mysmatch\n"); + debug_tree (decl1); + return false; + } + if (!decls_same_for_odr (DECL_NAME (decl1), DECL_NAME (decl2))) + return false; + return decls_same_for_odr (DECL_CONTEXT (decl1), + DECL_CONTEXT (decl2)); +} + +/* For languages with One Definition Rule, work out if + types are same even if the tree representation differs. + This is non-trivial for LTO where minnor differences in + the type representation may have prevented type merging + to merge two copies of otherwise equivalent type. */ + +static bool +types_same_for_odr (tree type1, tree type2) +{ + type1 = TYPE_MAIN_VARIANT (type1); + type2 = TYPE_MAIN_VARIANT (type2); + if (type1 == type2) + return true; + if (!type1 || !type2) + return false; + + /* If types are not structuraly same, do not bother to contnue. + Match in the remainder of code would mean ODR violation. */ + if (!types_compatible_p (type1, type2)) + return false; + + debug_tree (type1); + debug_tree (type2); + if (!TYPE_NAME (type1)) + { + fprintf (stderr, "Anonymous; name mysmatch\n"); + return false; + } + if (!decls_same_for_odr (TYPE_NAME (type1), TYPE_NAME (type2))) + return false; + if (!decls_same_for_odr (TYPE_CONTEXT (type1), TYPE_CONTEXT (type2))) + return false; + fprintf (stderr, "type match!\n"); + gcc_assert (in_lto_p); + + return true; +} + /* Try to find a base info of BINFO that would have its field decl at offset OFFSET within the BINFO type and which is of EXPECTED_TYPE. If it can be found, return, otherwise return NULL_TREE. */ @@ -11633,8 +11811,8 @@ get_binfo_at_offset (tree binfo, HOST_WI tree fld; int i; - if (TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (expected_type)) - return binfo; + if (types_same_for_odr (type, expected_type)) + return binfo; if (offset < 0) return NULL_TREE; @@ -11663,7 +11841,7 @@ get_binfo_at_offset (tree binfo, HOST_WI { tree base_binfo, found_binfo = NULL_TREE; for (i = 0; BINFO_BASE_ITERATE (binfo, i, base_binfo); i++) - if (TREE_TYPE (base_binfo) == TREE_TYPE (fld)) + if (types_same_for_odr (base_binfo, fld)) { found_binfo = base_binfo; break;