On Thu, 20 Oct 2011, Jan Hubicka wrote: > Hi, > this patch makes -fno-toplevel-reorder to work better with WHOPR. > The functions and variables comes out in proper order that is needed for Linux > kernel to currently boot with LTO because linker order is important there for > kernel's initialization code. > I also used this code when comparing various code layout algorithms - the > default layout is not as bad as one might think in most cases. > > The implementation is generally simple - lto_balanced_map already works on > fixed order of functions. It however grabs variables to first partition that > reffers to them and if none is found, they are all homed in the last > partition. > > This needs to be changed and variables needs to be inserted in order when > corresponding function is inserted, this is reason for lto_balanced_map > changes. > > Also we sort partitions by size in lto_wpa_write_files to make parallel make > finish faster. This would mix the linker order and needs to be disbaled. > We could of course output separate linker and makefile order, but I did't > bother > to do so. > > Also the patch won't output toplevel asm statements correctly - these are > still homed in first partition. I can look into this incrementally. > However to make this useful, we probably ought to prevent lto_balanced_map > to break up partitions in the middle of asm file. > > This is not needed for kernel, so I deffer it for later time. > > Unfortunately the patch doesn't make kernel to build since we hit quite > involved bug in partitioning and variable promotion. I am working on fix but > it > will take me bit time. > Well, extra stress on bugs in partitioning is another reason for this patch > to be interesting. > > Bootstrapped/regtested x86_64-linux, OK?
Ok. Thanks, Richard. > Honza > > * lto/lto.c (node_cmp, varpool_node_cmp): New functions. > (lto_balanced_map): Honnor -fno-toplevel-reorder of vars&functions. > (cmp_partitions): Rename to ... > (cmp_partitions_size): ... this one. > (cmp_partitions_order): New function. > (lto_wpa_write_files): Sort partitions by order when > -fno-toplevel-reorder is used. > Index: lto/lto.c > =================================================================== > --- lto/lto.c (revision 180181) > +++ lto/lto.c (working copy) > @@ -1665,6 +1673,23 @@ lto_1_to_1_map (void) > ltrans_partitions); > } > > +/* Helper function for qsort; sort nodes by order. */ > +static int > +node_cmp (const void *pa, const void *pb) > +{ > + const struct cgraph_node *a = *(const struct cgraph_node * const *) pa; > + const struct cgraph_node *b = *(const struct cgraph_node * const *) pb; > + return b->order - a->order; > +} > + > +/* Helper function for qsort; sort nodes by order. */ > +static int > +varpool_node_cmp (const void *pa, const void *pb) > +{ > + const struct varpool_node *a = *(const struct varpool_node * const *) pa; > + const struct varpool_node *b = *(const struct varpool_node * const *) pb; > + return b->order - a->order; > +} > > /* Group cgraph nodes into equally-sized partitions. > > @@ -1708,9 +1733,11 @@ static void > lto_balanced_map (void) > { > int n_nodes = 0; > + int n_varpool_nodes = 0, varpool_pos = 0; > struct cgraph_node **postorder = > XCNEWVEC (struct cgraph_node *, cgraph_n_nodes); > struct cgraph_node **order = XNEWVEC (struct cgraph_node *, > cgraph_max_uid); > + struct varpool_node **varpool_order = NULL; > int i, postorder_len; > struct cgraph_node *node; > int total_size = 0, best_total_size = 0; > @@ -1722,6 +1749,7 @@ lto_balanced_map (void) > int best_n_nodes = 0, best_n_varpool_nodes = 0, best_i = 0, best_cost = > INT_MAX, best_internal = 0; > int npartitions; > + int current_order = -1; > > for (vnode = varpool_nodes; vnode; vnode = vnode->next) > gcc_assert (!vnode->aux); > @@ -1731,6 +1759,7 @@ lto_balanced_map (void) > multiple partitions, this is just an estimate of real size. This is why > we keep partition_size updated after every partition is finalized. */ > postorder_len = ipa_reverse_postorder (postorder); > + > for (i = 0; i < postorder_len; i++) > { > node = postorder[i]; > @@ -1742,6 +1771,23 @@ lto_balanced_map (void) > } > free (postorder); > > + if (!flag_toplevel_reorder) > + { > + qsort (order, n_nodes, sizeof (struct cgraph_node *), node_cmp); > + > + for (vnode = varpool_nodes; vnode; vnode = vnode->next) > + if (partition_varpool_node_p (vnode)) > + n_varpool_nodes++; > + varpool_order = XNEWVEC (struct varpool_node *, n_varpool_nodes); > + > + n_varpool_nodes = 0; > + for (vnode = varpool_nodes; vnode; vnode = vnode->next) > + if (partition_varpool_node_p (vnode)) > + varpool_order[n_varpool_nodes++] = vnode; > + qsort (varpool_order, n_varpool_nodes, sizeof (struct varpool_node *), > + varpool_node_cmp); > + } > + > /* Compute partition size and create the first partition. */ > partition_size = total_size / PARAM_VALUE (PARAM_LTO_PARTITIONS); > if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE)) > @@ -1756,8 +1802,20 @@ lto_balanced_map (void) > { > if (order[i]->aux) > continue; > + > + current_order = order[i]->order; > + > + if (!flag_toplevel_reorder) > + while (varpool_pos < n_varpool_nodes && > varpool_order[varpool_pos]->order < current_order) > + { > + if (!varpool_order[varpool_pos]->aux) > + add_varpool_node_to_partition (partition, > varpool_order[varpool_pos]); > + varpool_pos++; > + } > + > add_cgraph_node_to_partition (partition, order[i]); > total_size -= inline_summary (order[i])->size; > + > > /* Once we added a new node to the partition, we also want to add > all referenced variables unless they was already added into some > @@ -1796,7 +1854,7 @@ lto_balanced_map (void) > > gcc_assert (node->analyzed); > > - /* Compute boundary cost of callgrpah edges. */ > + /* Compute boundary cost of callgraph edges. */ > for (edge = node->callees; edge; edge = edge->next_callee) > if (edge->callee->analyzed) > { > @@ -1848,7 +1906,8 @@ lto_balanced_map (void) > vnode = ipa_ref_varpool_node (ref); > if (!vnode->finalized) > continue; > - if (!vnode->aux && partition_varpool_node_p (vnode)) > + if (!vnode->aux && flag_toplevel_reorder > + && partition_varpool_node_p (vnode)) > add_varpool_node_to_partition (partition, vnode); > vsi = varpool_node_set_find (partition->varpool_set, vnode); > if (!vsi_end_p (vsi) > @@ -1878,7 +1937,8 @@ lto_balanced_map (void) > > vnode = ipa_ref_refering_varpool_node (ref); > gcc_assert (vnode->finalized); > - if (!vnode->aux && partition_varpool_node_p (vnode)) > + if (!vnode->aux && flag_toplevel_reorder > + && partition_varpool_node_p (vnode)) > add_varpool_node_to_partition (partition, vnode); > vsi = varpool_node_set_find (partition->varpool_set, vnode); > if (!vsi_end_p (vsi) > @@ -1967,9 +2027,22 @@ lto_balanced_map (void) > } > > /* Varables that are not reachable from the code go into last partition. > */ > - for (vnode = varpool_nodes; vnode; vnode = vnode->next) > - if (partition_varpool_node_p (vnode) && !vnode->aux) > - add_varpool_node_to_partition (partition, vnode); > + if (flag_toplevel_reorder) > + { > + for (vnode = varpool_nodes; vnode; vnode = vnode->next) > + if (partition_varpool_node_p (vnode) && !vnode->aux) > + add_varpool_node_to_partition (partition, vnode); > + } > + else > + { > + while (varpool_pos < n_varpool_nodes) > + { > + if (!varpool_order[varpool_pos]->aux) > + add_varpool_node_to_partition (partition, > varpool_order[varpool_pos]); > + varpool_pos++; > + } > + free (varpool_order); > + } > free (order); > } > > @@ -2134,7 +2205,7 @@ static lto_file *current_lto_file; > longest compilation being executed too late. */ > > static int > -cmp_partitions (const void *a, const void *b) > +cmp_partitions_size (const void *a, const void *b) > { > const struct ltrans_partition_def *pa > = *(struct ltrans_partition_def *const *)a; > @@ -2143,6 +2214,28 @@ cmp_partitions (const void *a, const voi > return pb->insns - pa->insns; > } > > +/* Helper for qsort; compare partitions and return one with smaller order. > */ > + > +static int > +cmp_partitions_order (const void *a, const void *b) > +{ > + const struct ltrans_partition_def *pa > + = *(struct ltrans_partition_def *const *)a; > + const struct ltrans_partition_def *pb > + = *(struct ltrans_partition_def *const *)b; > + int ordera = -1, orderb = -1; > + > + if (VEC_length (cgraph_node_ptr, pa->cgraph_set->nodes)) > + ordera = VEC_index (cgraph_node_ptr, pa->cgraph_set->nodes, 0)->order; > + else if (VEC_length (varpool_node_ptr, pa->varpool_set->nodes)) > + ordera = VEC_index (varpool_node_ptr, pa->varpool_set->nodes, 0)->order; > + if (VEC_length (cgraph_node_ptr, pb->cgraph_set->nodes)) > + orderb = VEC_index (cgraph_node_ptr, pb->cgraph_set->nodes, 0)->order; > + else if (VEC_length (varpool_node_ptr, pb->varpool_set->nodes)) > + orderb = VEC_index (varpool_node_ptr, pb->varpool_set->nodes, 0)->order; > + return orderb - ordera; > +} > + > /* Write all output files in WPA mode and the file with the list of > LTRANS units. */ > > @@ -2191,7 +2284,12 @@ lto_wpa_write_files (void) > blen = strlen (temp_filename); > > n_sets = VEC_length (ltrans_partition, ltrans_partitions); > - VEC_qsort (ltrans_partition, ltrans_partitions, cmp_partitions); > + > + /* Sort partitions by size so small ones are compiled last. > + FIXME: Even when not reordering we may want to output one list for > parallel make > + and other for final link command. */ > + VEC_qsort (ltrans_partition, ltrans_partitions, > + flag_toplevel_reorder ? cmp_partitions_size : cmp_partitions_order); > for (i = 0; i < n_sets; i++) > { > size_t len; > > -- Richard Guenther <rguent...@suse.de> SUSE / SUSE Labs SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746 GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer