On Sun, 25 Feb 2024, Tamar Christina wrote:

> Hi All,
> 
> The testcase shows an interesting case where we have multiple loops sharing a
> live value and have an early exit that go to the same location.  The 
> additional
> complication is that on x86_64 with -mavx we seem to also do prologue peeling
> on the loops.
> 
> We correctly identify which BB we need their dominators updated for, but we do
> so too early.
> 
> Instead of adding more dominator update we can solve this by for the cases 
> with
> multiple exits not to verify dominators at the end of peeling if peeling for
> vectorization.
> 
> We can then perform the final dominator updates just before vectorization when
> all loop transformations are done.

What's the actual CFG transform that happens between the old and the new
place?  I see a possible edge splitting but where is the one that makes
this patch work?

> This also means we reduce the number of dominator updates needed by at least
> 50% and fixes the ICE.
> 
> Bootstrapped Regtested on aarch64-none-linux-gnu and
> x86_64-pc-linux-gnu no issues.
> 
> Ok for master?
> 
> Thanks,
> Tamar
> 
> gcc/ChangeLog:
> 
>       PR tree-optimization/114081
>       PR tree-optimization/113290
>       * tree-vect-loop-manip.cc (slpeel_tree_duplicate_loop_to_edge_cfg):
>       Skip dominator update when multiple exit.
>       (vect_do_peeling): Remove multiple exit dominator update.
>       * tree-vect-loop.cc (vect_transform_loop): Update dominators when
>       multiple exits.
>       * tree-vectorizer.h (LOOP_VINFO_DOMS_NEED_UPDATE,
>                            dominators_needing_update): New.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR tree-optimization/114081
>       PR tree-optimization/113290
>       * gcc.dg/vect/vect-early-break_120-pr114081.c: New test.
>       * gcc.dg/vect/vect-early-break_121-pr114081.c: New test.
> 
> --- inline copy of patch -- 
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..2cd4ce1e4ac573ba6e41730fd2216f0ec8061376
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_120-pr114081.c
> @@ -0,0 +1,38 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +typedef struct filter_list_entry {
> +  const char *name;
> +  int id;
> +  void (*function)();
> +} filter_list_entry;
> +
> +static const filter_list_entry filter_list[9] = {0};
> +
> +void php_zval_filter(int filter, int id1) {
> +  filter_list_entry filter_func;
> +
> +  int size = 9;
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == filter) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +
> +#pragma GCC novector
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == 0x0204) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +done:
> +  if (!filter_func.id)
> +    filter_func.function();
> +}
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c 
> b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> new file mode 100644
> index 
> 0000000000000000000000000000000000000000..feebdb7a6c9b8981d7be31dd1c741f9e36738515
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_121-pr114081.c
> @@ -0,0 +1,37 @@
> +/* { dg-do compile } */
> +/* { dg-add-options vect_early_break } */
> +/* { dg-require-effective-target vect_early_break } */
> +/* { dg-require-effective-target vect_int } */
> +/* { dg-additional-options "-O3" } */
> +
> +/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
> +
> +typedef struct filter_list_entry {
> +  const char *name;
> +  int id;
> +  void (*function)();
> +} filter_list_entry;
> +
> +static const filter_list_entry filter_list[9] = {0};
> +
> +void php_zval_filter(int filter, int id1) {
> +  filter_list_entry filter_func;
> +
> +  int size = 9;
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == filter) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +
> +  for (int i = 0; i < size; ++i) {
> +    if (filter_list[i].id == 0x0204) {
> +      filter_func = filter_list[i];
> +      goto done;
> +    }
> +  }
> +done:
> +  if (!filter_func.id)
> +    filter_func.function();
> +}
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index 
> 3f974d6d839e32516ae316f28ca25316e43d7d86..b5e158bc5cfb5107d5ff461e489d306f81e090d0
>  100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1917,7 +1917,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
> *loop, edge loop_exit,
>         doms.safe_push (e->dest);
>       }
>  
> -      iterate_fix_dominators (CDI_DOMINATORS, doms, false);
>        if (updated_doms)
>       updated_doms->safe_splice (doms);
>      }
> @@ -1925,7 +1924,9 @@ slpeel_tree_duplicate_loop_to_edge_cfg (class loop 
> *loop, edge loop_exit,
>    free (new_bbs);
>    free (bbs);
>  
> -  checking_verify_dominators (CDI_DOMINATORS);
> +  /* If we're peeling for vectorization then delay verifying dominators.  */
> +  if (!flow_loops || !multiple_exits_p)
> +    checking_verify_dominators (CDI_DOMINATORS);
>  
>    return new_loop;
>  }
> @@ -3404,7 +3405,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, 
> tree nitersm1,
>        epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
>        edge epilog_e = vect_epilogues ? e : scalar_e;
>        edge new_epilog_e = NULL;
> -      auto_vec<basic_block> doms;
> +      auto_vec<basic_block>& doms = LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo);
>        epilog
>       = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e, epilog, epilog_e, e,
>                                                 &new_epilog_e, true, &doms);
> @@ -3554,10 +3555,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree 
> niters, tree nitersm1,
>         scale_loop_profile (epilog, prob_epilog, -1);
>       }
>  
> -      /* Recalculate the dominators after adding the guard edge.  */
> -      if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> -     iterate_fix_dominators (CDI_DOMINATORS, doms, false);
> -
>        /* When we do not have a loop-around edge to the epilog we know
>        the vector loop covered at least VF scalar iterations unless
>        we have early breaks.
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index 
> 35f1f8c7d4245135ace7ffff40ff9be548919587..ab19ad6a6be516e3ee1f0fbeaaeeffeae1fb900f
>  100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -11987,7 +11987,12 @@ vect_transform_loop (loop_vec_info loop_vinfo, 
> gimple *loop_vectorized_call)
>    /* Handle any code motion that we need to for early-break vectorization 
> after
>       we've done peeling but just before we start vectorizing.  */
>    if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo))
> -    move_early_exit_stmts (loop_vinfo);
> +    {
> +      /* Recalculate the dominators.  */
> +      iterate_fix_dominators (CDI_DOMINATORS,
> +                           LOOP_VINFO_DOMS_NEED_UPDATE (loop_vinfo), false);
> +      move_early_exit_stmts (loop_vinfo);
> +    }
>  
>    /* Schedule the SLP instances first, then handle loop vectorization
>       below.  */
> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> index 
> db44d730b7026492c4dd8c394ab505f227edfc8e..6bb5b6d69a8340c0b68cbcbbb110c0380b7d73aa
>  100644
> --- a/gcc/tree-vectorizer.h
> +++ b/gcc/tree-vectorizer.h
> @@ -961,6 +961,10 @@ public:
>    /* Statements whose VUSES need updating if early break vectorization is to
>       happen.  */
>    auto_vec<gimple*> early_break_vuses;
> +
> +  /* Dominators that need to be recalculated that have been deferred until 
> after
> +     all peeling.  */
> +  auto_vec<basic_block> dominators_needing_update;
>  } *loop_vec_info;
>  
>  /* Access Functions.  */
> @@ -1021,6 +1025,7 @@ public:
>    (single_pred ((L)->loop->latch) != (L)->vec_loop_iv_exit->src)
>  #define LOOP_VINFO_EARLY_BRK_DEST_BB(L)    (L)->early_break_dest_bb
>  #define LOOP_VINFO_EARLY_BRK_VUSES(L)      (L)->early_break_vuses
> +#define LOOP_VINFO_DOMS_NEED_UPDATE(L)     (L)->dominators_needing_update
>  #define LOOP_VINFO_LOOP_CONDS(L)           (L)->conds
>  #define LOOP_VINFO_LOOP_IV_COND(L)         (L)->loop_iv_cond
>  #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies
> 
> 
> 
> 
> 

-- 
Richard Biener <rguent...@suse.de>
SUSE Software Solutions Germany GmbH,
Frankenstrasse 146, 90461 Nuernberg, Germany;
GF: Ivo Totev, Andrew McDonald, Werner Knoblich; (HRB 36809, AG Nuernberg)

Reply via email to