On 2021/12/13 16:57, Xionghu Luo via Gcc-patches wrote:
> 
> 
> On 2021/12/9 07:47, Jeff Law wrote:
>>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>>> index 3f6ad046623..33128061aab 100644
>>> --- a/gcc/tree-ssa-loop-split.c
>>> +++ b/gcc/tree-ssa-loop-split.c
>>>
>>> @@ -607,6 +610,38 @@ split_loop (class loop *loop1)
>>>       tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge
>>> (loop1));
>>>       patch_loop_exit (loop1, guard_stmt, guard_next, newend,
>>> initial_true);
>>>   +    update_ssa (TODO_update_ssa);
>>> +
>>> +    /* Proportion first loop's bb counts except those dominated by true
>>> +       branch to avoid drop 1s down.  */
>>> +    basic_block *bbs1, *bbs2;
>>> +    bbs1 = get_loop_body (loop1);
>>> +    unsigned j;
>>> +    for (j = 0; j < loop1->num_nodes; j++)
>>> +      if (bbs1[j] == loop1->latch
>>> +          || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
>>> +        bbs1[j]->count
>>> +          = bbs1[j]->count.apply_probability (true_edge->probability);
>>> +    free (bbs1);
>> It looks like there's two copies of this code in this patch, one in
>> split_loop and the other in do_split_loop_on_cond.  Would it make sense
>> to factor it out into its own little function?
>>
>>
>>> +
>>> +    /* Proportion second loop's bb counts except those dominated by
>>> false
>>> +       branch to avoid drop 1s down.  */
>>> +    basic_block bbi_copy = get_bb_copy (false_edge->dest);
>>> +    bbs2 = get_loop_body (loop2);
>>> +    for (j = 0; j < loop2->num_nodes; j++)
>>> +      if (bbs2[j] == loop2->latch
>>> +          || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
>>> +        bbs2[j]->count = bbs2[j]->count.apply_probability (
>>> +          true_edge->probability.invert ());
>>> +    free (bbs2);
>> Similarly for this block of code.
>>
>> If those can be reasonably factored out into two helper functions to be
>> called from split_loop and do_split_loop_on_cond, then this is OK with
>> the refactoring.
>>
>> jeff
> 
> 
> Thanks for the comments, updated as below.  Will commit this patchset and the
> approved patch for LIM if there are no objections:

This patch is committed to r12-6086.

> 
> 
> [PATCH v2 3/3] Fix loop split incorrect count and probability
> 
> In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two
> kind of split. split_loop only works for single loop and insert edge at
> exit when split, while split_loop_on_cond is not limited to single loop
> and insert edge at latch when split.  Both split behavior should consider
> loop count and probability update.  For split_loop, loop split condition
> is moved in front of loop1 and loop2; But split_loop_on_cond moves the
> condition between loop1 and loop2, this patch does:
>  1) profile count proportion for both original loop and copied loop
> without dropping down the true branch's count;
>  2) probability update in the two loops and between the two loops.
> 
> Regression tested pass, OK for master?
> 
> Changes diff for split_loop and split_loop_on_cond cases:
> 
> 1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit
> ...
>    <bb 2> [local count: 118111600]:
>    _1 = end_6(D) - beg_7(D);
>    j_9 = _1 + beg2_8(D);
>    if (end_6(D) > beg_7(D))
>      goto <bb 14>; [89.00%]
>    else
>      goto <bb 6>; [11.00%]
> 
>    <bb 14> [local count: 105119324]:
>    if (j_9 >= c_11(D))
> -    goto <bb 15>; [100.00%]
> +    goto <bb 15>; [33.00%]
>    else
> -    goto <bb 16>; [100.00%]
> +    goto <bb 16>; [67.00%]
> 
> -  <bb 15> [local count: 105119324]:
> +  <bb 15> [local count: 34689377]:
>    _27 = end_6(D) + -1;
>    _28 = beg_7(D) - end_6(D);
>    _29 = j_9 + _28;
>    _30 = _29 + 1;
>    _31 = MAX_EXPR <c_11(D), _30>;
> 
> -  <bb 3> [local count: 955630225]:
> +  <bb 3> [local count: 315357973]:
>    # i_18 = PHI <i_13(8), end_6(D)(15)>
>    # j_19 = PHI <j_14(8), j_9(15)>
>    printf ("a: %d %d\n", i_18, j_19);
>    i_13 = i_18 + -1;
>    j_14 = j_19 + -1;
>    if (j_14 >= _31)
> -    goto <bb 8>; [89.00%]
> +    goto <bb 8>; [29.37%]
>    else
> -    goto <bb 17>; [11.00%]
> +    goto <bb 17>; [70.63%]
> 
> -  <bb 8> [local count: 850510901]:
> +  <bb 8> [local count: 280668596]:
>    goto <bb 3>; [100.00%]
> 
> -  <bb 16> [local count: 105119324]:
> +  <bb 16> [local count: 70429947]:
>    # i_24 = PHI <end_6(D)(14), i_32(17)>
>    # j_25 = PHI <j_9(14), j_33(17)>
> 
>    <bb 10> [local count: 955630225]:
>    # i_3 = PHI <i_24(16), i_22(13)>
>    # j_2 = PHI <j_25(16), j_23(13)>
>    i_22 = i_3 + -1;
>    j_23 = j_2 + -1;
>    if (beg_7(D) < i_22)
>      goto <bb 13>; [89.00%]
>    else
>      goto <bb 9>; [11.00%]
> 
> -  <bb 13> [local count: 850510901]:
> +  <bb 13> [local count: 569842305]:
>    goto <bb 10>; [100.00%]
> 
>    <bb 17> [local count: 105119324]:
>    # i_32 = PHI <i_13(3)>
>    # j_33 = PHI <j_14(3)>
>    if (beg_7(D) < i_32)
>      goto <bb 16>; [80.00%]
>    else
>      goto <bb 9>; [20.00%]
> 
>    <bb 9> [local count: 105119324]:
> 
>    <bb 6> [local count: 118111600]:
>    return 0;
> 
>  }
> 
> 2) diff base/loop-cond-split-1.c.151t.lsplit  
> patched/loop-cond-split-1.c.151t.lsplit:
> ...
>    <bb 2> [local count: 118111600]:
>    if (n_7(D) > 0)
>      goto <bb 4>; [89.00%]
>    else
>      goto <bb 3>; [11.00%]
> 
>    <bb 3> [local count: 118111600]:
>    return;
> 
>    <bb 4> [local count: 105119324]:
>    pretmp_3 = ga;
> 
> -  <bb 5> [local count: 955630225]:
> +  <bb 5> [local count: 315357973]:
>    # i_13 = PHI <i_10(20), 0(4)>
>    # prephitmp_12 = PHI <prephitmp_5(20), pretmp_3(4)>
>    if (prephitmp_12 != 0)
>      goto <bb 6>; [33.00%]
>    else
>      goto <bb 7>; [67.00%]
> 
>    <bb 6> [local count: 315357972]:
>    _2 = do_something ();
>    ga = _2;
> 
> -  <bb 7> [local count: 955630225]:
> +  <bb 7> [local count: 315357973]:
>    # prephitmp_5 = PHI <prephitmp_12(5), _2(6)>
>    i_10 = inc (i_13);
>    if (n_7(D) > i_10)
>      goto <bb 21>; [89.00%]
>    else
>      goto <bb 11>; [11.00%]
> 
>    <bb 11> [local count: 105119324]:
>    goto <bb 3>; [100.00%]
> 
> -  <bb 21> [local count: 850510901]:
> +  <bb 21> [local count: 280668596]:
>    if (prephitmp_12 != 0)
> -    goto <bb 20>; [100.00%]
> +    goto <bb 20>; [33.00%]
>    else
> -    goto <bb 19>; [INV]
> +    goto <bb 19>; [67.00%]
> 
> -  <bb 20> [local count: 850510901]:
> +  <bb 20> [local count: 280668596]:
>    goto <bb 5>; [100.00%]
> 
> -  <bb 19> [count: 0]:
> +  <bb 19> [local count: 70429947]:
>    # i_23 = PHI <i_10(21)>
>    # prephitmp_25 = PHI <prephitmp_5(21)>
> 
> -  <bb 12> [local count: 955630225]:
> +  <bb 12> [local count: 640272252]:
>    # i_15 = PHI <i_23(19), i_22(16)>
>    # prephitmp_16 = PHI <prephitmp_25(19), prephitmp_16(16)>
>    i_22 = inc (i_15);
>    if (n_7(D) > i_22)
>      goto <bb 16>; [89.00%]
>    else
>      goto <bb 11>; [11.00%]
> 
> -  <bb 16> [local count: 850510901]:
> +  <bb 16> [local count: 569842305]:
>    goto <bb 12>; [100.00%]
>  }
> 
> gcc/ChangeLog:
> 
>       * tree-ssa-loop-split.c (Fix_loop_bb_probability): New function.
>       (split_loop): Fix incorrect profile_count and probability.
>       (do_split_loop_on_cond): Likewise.
> ---
>  gcc/tree-ssa-loop-split.c | 72 ++++++++++++++++++++++++++++++++++-----
>  1 file changed, 64 insertions(+), 8 deletions(-)
> 
> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index 3f6ad046623..55011aeed97 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> @@ -484,6 +484,39 @@ compute_new_first_bound (gimple_seq *stmts, class 
> tree_niter_desc *niter,
>    return newend;
>  }
> 
> +/* Fix the two loop's bb count after split based on the split edge 
> probability,
> +   don't adjust the bbs dominated by true branches of that loop to avoid
> +   dropping 1s down.  */
> +static void
> +fix_loop_bb_probability (class loop *loop1, class loop *loop2, edge 
> true_edge,
> +                      edge false_edge)
> +{
> +  update_ssa (TODO_update_ssa);
> +
> +  /* Proportion first loop's bb counts except those dominated by true
> +     branch to avoid drop 1s down.  */
> +  basic_block *bbs1, *bbs2;
> +  bbs1 = get_loop_body (loop1);
> +  unsigned j;
> +  for (j = 0; j < loop1->num_nodes; j++)
> +    if (bbs1[j] == loop1->latch
> +     || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
> +      bbs1[j]->count
> +     = bbs1[j]->count.apply_probability (true_edge->probability);
> +  free (bbs1);
> +
> +  /* Proportion second loop's bb counts except those dominated by false
> +     branch to avoid drop 1s down.  */
> +  basic_block bbi_copy = get_bb_copy (false_edge->dest);
> +  bbs2 = get_loop_body (loop2);
> +  for (j = 0; j < loop2->num_nodes; j++)
> +    if (bbs2[j] == loop2->latch
> +     || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
> +      bbs2[j]->count
> +     = bbs2[j]->count.apply_probability (true_edge->probability.invert ());
> +  free (bbs2);
> +}
> +
>  /* Checks if LOOP contains an conditional block whose condition
>     depends on which side in the iteration space it is, and if so
>     splits the iteration space into two loops.  Returns true if the
> @@ -575,7 +608,10 @@ split_loop (class loop *loop1)
>                                           stmts2);
>       tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
>       if (!initial_true)
> -       cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
> +       cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond);
> +
> +     edge true_edge, false_edge;
> +     extract_true_false_edges_from_block (bbs[i], &true_edge, &false_edge);
> 
>       /* Now version the loop, placing loop2 after loop1 connecting
>          them, and fix up SSA form for that.  */
> @@ -583,11 +619,11 @@ split_loop (class loop *loop1)
>       basic_block cond_bb;
> 
>       class loop *loop2 = loop_version (loop1, cond, &cond_bb,
> -                                        profile_probability::always (),
> -                                        profile_probability::always (),
> -                                        profile_probability::always (),
> -                                        profile_probability::always (),
> -                                        true);
> +                                       true_edge->probability,
> +                                       true_edge->probability.invert (),
> +                                       profile_probability::always (),
> +                                       profile_probability::always (),
> +                                       true);
>       gcc_assert (loop2);
> 
>       edge new_e = connect_loops (loop1, loop2);
> @@ -607,6 +643,15 @@ split_loop (class loop *loop1)
>       tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
>       patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
> 
> +     fix_loop_bb_probability (loop1, loop2, true_edge, false_edge);
> +
> +     /* Fix first loop's exit probability after scaling.  */
> +     edge exit_to_latch1 = single_pred_edge (loop1->latch);
> +     exit_to_latch1->probability = exit_to_latch1->probability.apply_scale (
> +       true_edge->probability.to_reg_br_prob_base (), REG_BR_PROB_BASE);
> +     single_exit (loop1)->probability
> +       = exit_to_latch1->probability.invert ();
> +
>       /* Finally patch out the two copies of the condition to be always
>          true/false (or opposite).  */
>       gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
> @@ -1486,8 +1531,8 @@ do_split_loop_on_cond (struct loop *loop1, edge 
> invar_branch)
>    initialize_original_copy_tables ();
> 
>    struct loop *loop2 = loop_version (loop1, boolean_true_node, NULL,
> -                                  profile_probability::always (),
> -                                  profile_probability::never (),
> +                                  invar_branch->probability.invert (),
> +                                  invar_branch->probability,
>                                    profile_probability::always (),
>                                    profile_probability::always (),
>                                    true);
> @@ -1535,6 +1580,17 @@ do_split_loop_on_cond (struct loop *loop1, edge 
> invar_branch)
>       between loop1 and loop2.  */
>    connect_loop_phis (loop1, loop2, to_loop2);
> 
> +  edge true_edge, false_edge, skip_edge1, skip_edge2;
> +  extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
> +
> +  skip_edge1 = true_invar ? false_edge : true_edge;
> +  skip_edge2 = true_invar ? true_edge : false_edge;
> +  fix_loop_bb_probability (loop1, loop2, skip_edge1, skip_edge2);
> +
> +  /* Fix first loop's exit probability after scaling.  */
> +  to_loop1->probability = invar_branch->probability.invert ();
> +  to_loop2->probability = invar_branch->probability;
> +
>    free_original_copy_tables ();
> 
>    return true;

-- 
Thanks,
Xionghu

Reply via email to