> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> index b401f0817a3..042c03d819e 100644
> --- a/gcc/cgraphunit.c
> +++ b/gcc/cgraphunit.c
> @@ -1961,8 +1961,9 @@ expand_all_functions (void)
> }
>
> /* First output functions with time profile in specified order. */
> - qsort (tp_first_run_order, tp_first_run_order_pos,
> - sizeof (cgraph_node *), tp_first_run_node_cmp);
> + if (flag_profile_reproducible == PROFILE_REPRODUCIBILITY_SERIAL)
> + qsort (tp_first_run_order, tp_first_run_order_pos,
> + sizeof (cgraph_node *), tp_first_run_node_cmp);
This you need to check eariler in
for (i = 0; i < order_pos; i++)
if (order[i]->process)
{
if (order[i]->tp_first_run
&& opt_for_fn (order[i]->decl, flag_profile_reorder_functions))
^^^^^ here
and check only for REPRODUCIBILITY_MULTITHREADED. We probably also want
to document this.
However easier fix is to simply clear tp_first_run at profile read time
if we do multithreaded reproducibility instead of attaching it and
ignoring later. This will make both places you modified to do the right
thing.
Honza
> for (i = 0; i < tp_first_run_order_pos; i++)
> {
> node = tp_first_run_order[i];
> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
> index 15761ac9eb5..f9e632776e6 100644
> --- a/gcc/lto/lto-partition.c
> +++ b/gcc/lto/lto-partition.c
> @@ -509,7 +509,8 @@ lto_balanced_map (int n_lto_partitions, int
> max_partition_size)
> unit tends to import a lot of global trees defined there. We should
> get better about minimizing the function bounday, but until that
> things works smoother if we order in source order. */
> - order.qsort (tp_first_run_node_cmp);
> + if (flag_profile_reproducible == PROFILE_REPRODUCIBILITY_SERIAL)
> + order.qsort (tp_first_run_node_cmp);
> noreorder.qsort (node_cmp);
>
> if (dump_file)
> --
> 2.30.0
>