I wonder if you can factor out generic part into GIMPLE and leave
target hook as specific as possible?

On Tue, May 14, 2019 at 11:10 AM <li...@linux.ibm.com> wrote:
>
> From: Kewen Lin <li...@linux.ibm.com>
>
> Previous version link for background:
> https://gcc.gnu.org/ml/gcc-patches/2019-04/msg00912.html
>
> This hook is to predict whether one loop in gimple will
> be transformed to low-overhead loop later in RTL, and
> designed to be called in ivopts.
>
> "Since the low-overhead loop optimize transformation is
> based on RTL, some of those checks are hard to be imitated
> on gimple, so it's not possible to predict the current
> loop will be transformed exactly in middle-end.  But if we
> can have most loop predicted precisely, it would be helpful.
> It highly depends on target hook fine tuning. It's
> acceptable to have some loops which can be transformed to
> low-overhead loop but we don't catch.  But we should try
> our best to avoid to predict some loop as low-overhead loop
> but which isn't."
>
> Bootstrapped and regression testing passed on powerpc64le.
>
> Is it ok for trunk?
>
> gcc/ChangeLog
>
> 2019-05-13  Kewen Lin  <li...@gcc.gnu.org>
>
>         PR middle-end/80791
>         * target.def (predict_doloop_p): New hook.
>         * targhooks.h (default_predict_doloop_p): New declaration.
>         * targhooks.c (default_predict_doloop_p): New function.
>         * doc/tm.texi.in (TARGET_PREDICT_DOLOOP_P): New hook.
>         * doc/tm.texi: Regenerate.
>         * config/rs6000/rs6000.c (invalid_insn_for_doloop_p): New function.
>         (costly_iter_for_doloop_p): Likewise.
>         (rs6000_predict_doloop_p): Likewise.
>         (TARGET_PREDICT_DOLOOP_P): New macro.
>
> ---
>  gcc/config/rs6000/rs6000.c | 174 
> ++++++++++++++++++++++++++++++++++++++++++++-
>  gcc/doc/tm.texi            |   8 +++
>  gcc/doc/tm.texi.in         |   2 +
>  gcc/target.def             |   9 +++
>  gcc/targhooks.c            |  13 ++++
>  gcc/targhooks.h            |   1 +
>  6 files changed, 206 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index a21f4f7..1c1c8eb 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -83,6 +83,9 @@
>  #include "tree-ssa-propagate.h"
>  #include "tree-vrp.h"
>  #include "tree-ssanames.h"
> +#include "tree-ssa-loop-niter.h"
> +#include "tree-cfg.h"
> +#include "tree-scalar-evolution.h"
>
>  /* This file should be included last.  */
>  #include "target-def.h"
> @@ -1914,6 +1917,9 @@ static const struct attribute_spec 
> rs6000_attribute_table[] =
>  #undef TARGET_CAN_USE_DOLOOP_P
>  #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
>
> +#undef TARGET_PREDICT_DOLOOP_P
> +#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
> +
>  #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
>  #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
>
> @@ -39436,7 +39442,173 @@ rs6000_mangle_decl_assembler_name (tree decl, tree 
> id)
>    return id;
>  }
>
> -
> +/* Check whether there are some instructions preventing doloop transformation
> +   inside loop body, mainly for instructions which are possible to kill CTR.
> +
> +   Return true if some invalid insn exits, otherwise return false.  */
> +
> +static bool
> +invalid_insn_for_doloop_p (struct loop *loop)
> +{
> +  basic_block *body = get_loop_body (loop);
> +  gimple_stmt_iterator gsi;
> +
> +  for (unsigned i = 0; i < loop->num_nodes; i++)
> +    for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
The loops on bbs/stmts seem general and can be put in GIMPLE.  So a
target hook taking gimple_stmt parameter and returning if the stmt
blocks doloop is enough?

> +      {
> +       gimple *stmt = gsi_stmt (gsi);
> +       if (is_gimple_call (stmt) && !gimple_call_internal_p (stmt)
> +           && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
> +         {
> +           if (dump_file && (dump_flags & TDF_DETAILS))
> +             fprintf (dump_file,
> +                      "predict doloop failure due to finding call.\n");
> +           return true;
> +         }
> +       if (computed_goto_p (stmt))
> +         {
> +           if (dump_file && (dump_flags & TDF_DETAILS))
> +             fprintf (dump_file, "predict doloop failure due to"
> +                                 "finding computed jump.\n");
> +           return true;
> +         }
> +       /* TODO: Now this hook is expected to be called in ivopts, which is
> +          before switchlower1/switchlower2.  Checking for SWITCH at this 
> point
> +       will eliminate some good candidates.  But since there are only a few
> +       cases having _a_ switch statement in the innermost loop, it can be a 
> low
> +          priority enhancement.  */
> +
> +       if (gimple_code (stmt) == GIMPLE_SWITCH)
> +         {
> +           if (dump_file && (dump_flags & TDF_DETAILS))
> +             fprintf (dump_file,
> +                      "predict doloop failure due to finding switch.\n");
> +           return true;
> +         }
> +      }
> +
> +  return false;
> +}
> +
> +/* Check whether number of iteration computation is too costly for doloop
> +   transformation.  It expands the gimple sequence to equivalent RTL insn
> +   sequence, then evaluate the cost.
> +
> +   Return true if it's costly, otherwise return false.  */
> +
> +static bool
> +costly_iter_for_doloop_p (struct loop *loop, tree niters)
> +{
> +  tree type = TREE_TYPE (niters);
> +  unsigned cost = 0, i;
> +  tree obj;
> +  bool speed = optimize_loop_for_speed_p (loop);
> +  int regno = LAST_VIRTUAL_REGISTER + 1;
> +  vec<tree> tvec;
> +  tvec.create (20);
> +  decl_rtl_data tdata;
> +  tdata.regno = &regno;
> +  tdata.treevec = &tvec;
> +  walk_tree (&niters, prepare_decl_rtl, &tdata, NULL);
> +  start_sequence ();
> +  expand_expr (niters, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
> +  rtx_insn *seq = get_insns ();
> +  end_sequence ();
> +  FOR_EACH_VEC_ELT (tvec, i, obj)
> +    SET_DECL_RTL (obj, NULL_RTX);
> +  tvec.release ();
> +
> +  for (; seq; seq = NEXT_INSN (seq))
> +    {
> +      if (!INSN_P (seq))
> +       continue;
> +      rtx body = PATTERN (seq);
> +      if (GET_CODE (body) == SET)
> +       {
> +         rtx set_val = XEXP (body, 1);
> +         enum rtx_code code = GET_CODE (set_val);
> +         enum rtx_class cls = GET_RTX_CLASS (code);
> +         /* For now, we only consider these two RTX classes, to match what we
> +        get in doloop_optimize, excluding operations like zero/sign extend.  
> */
> +         if (cls == RTX_BIN_ARITH || cls == RTX_COMM_ARITH)
> +           cost += set_src_cost (set_val, GET_MODE (set_val), speed);
> +       }
> +    }
> +  unsigned max_cost
> +    = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
> +  if (cost > max_cost)
> +    return true;
> +
> +  return false;
> +}
Though this function works on RTL, it's generic too, especially the
code is already in ivopts (and moved by your previous patch).
> +
> +/*
> +   Predict whether the given loop will be transformed in the RTL
> +   doloop_optimize pass.  This is for use by the IVOPTS middle-end pass.
> +   Attempt to duplicate as many doloop_optimize checks as possible.
> +
> +   Some RTL specific checks seems unable to be checked here, if any
> +   new checks or easy checks _are_ missing here.  */
> +
> +static bool
> +rs6000_predict_doloop_p (struct loop *loop)
> +{
> +  gcc_assert (loop);
> +
> +  /* On rs6000, targetm.can_use_doloop_p is actually
> +     can_use_doloop_if_innermost.  Just ensure it's innermost.  */
> +  if (loop->inner != NULL)
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file, "predict doloop failure due to"
> +                           "no innermost.\n");
> +      return false;
> +    }
> +
> +  /* number_of_latch_executions is not so costly, so we don't use
> +     number_of_iterations_exit for iteration description.  */
> +  tree niters = number_of_latch_executions (loop);
> +  if (niters == NULL_TREE || niters == chrec_dont_know)
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file, "predict doloop failure due to"
> +                           "unexpected niters.\n");
> +      return false;
> +    }
The checks on niters are generic too.
> +
> +  /* Similar to doloop_optimize, check whether iteration count too small
> +     and not profitable.  */
> +  HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
> +  if (est_niter == -1)
> +    est_niter = get_likely_max_loop_iterations_int (loop);
> +  if (est_niter >= 0 && est_niter < 3)
The only probably target dependent is the magic number 3 here.  After
moving all generic part to ivopts, we can use a macro for the moment,
and improve it as a parameter if there are more doloop targets.
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file,
> +                "predict doloop failure due to"
> +                "too few iterations (%d).\n",
> +                (unsigned int) est_niter);
> +      return false;
> +    }
> +
> +  /* Similar to doloop_optimize, check whether number of iterations too 
> costly
> +     to compute.  */
> +  if (costly_iter_for_doloop_p (loop, niters))
> +    {
> +      if (dump_file && (dump_flags & TDF_DETAILS))
> +       fprintf (dump_file, "predict doloop failure due to"
> +                           "costly niter computation.\n");
> +      return false;
> +    }
> +
> +  /* Similar to doloop_optimize targetm.invalid_within_doloop, check for
> +     CALL, tablejump, computed_jump.  */
> +  if (invalid_insn_for_doloop_p (loop))
> +    return false;
> +
> +  return true;
> +}
> +
Overall most of above checks can be moved out of backend, leaving only
more specific target hook checking on gimple_stmt?  And even that can
be made generic to some extend.

Thanks,
bin
>  struct gcc_target targetm = TARGET_INITIALIZER;
>
>  #include "gt-rs6000.h"
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index 8c8978b..e595b8b 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11603,6 +11603,14 @@ function version at run-time for a given set of 
> function versions.
>  body must be generated.
>  @end deftypefn
>
> +@deftypefn {Target Hook} bool TARGET_PREDICT_DOLOOP_P (struct loop 
> *@var{loop})
> +Return true if we can predict it is possible to use low-overhead loops
> +for a particular loop.  The parameter @var{loop} is a pointer to the loop
> +which is going to be checked.  This target hook is required only when the
> +target supports low-overhead loops, and will help some earlier middle-end
> +passes to make some decisions.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int 
> @var{&iterations}, const widest_int @var{&iterations_max}, unsigned int 
> @var{loop_depth}, bool @var{entered_at_top})
>  Return true if it is possible to use low-overhead loops (@code{doloop_end}
>  and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index fe1194e..e047734 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -7942,6 +7942,8 @@ to by @var{ce_info}.
>
>  @hook TARGET_GENERATE_VERSION_DISPATCHER_BODY
>
> +@hook TARGET_PREDICT_DOLOOP_P
> +
>  @hook TARGET_CAN_USE_DOLOOP_P
>
>  @hook TARGET_INVALID_WITHIN_DOLOOP
> diff --git a/gcc/target.def b/gcc/target.def
> index 66cee07..0a80de3 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4227,6 +4227,15 @@ DEFHOOK
>  rtx, (machine_mode mode, rtx result, rtx val, rtx failval),
>   default_speculation_safe_value)
>
> +DEFHOOK
> +(predict_doloop_p,
> + "Return true if we can predict it is possible to use low-overhead loops\n\
> +for a particular loop.  The parameter @var{loop} is a pointer to the loop\n\
> +which is going to be checked.  This target hook is required only when the\n\
> +target supports low-overhead loops, and will help some earlier middle-end\n\
> +passes to make some decisions.",
> + bool, (struct loop *loop),
> + default_predict_doloop_p)
>
>  DEFHOOK
>  (can_use_doloop_p,
> diff --git a/gcc/targhooks.c b/gcc/targhooks.c
> index 318f7e9..56ed4cc 100644
> --- a/gcc/targhooks.c
> +++ b/gcc/targhooks.c
> @@ -643,6 +643,19 @@ default_has_ifunc_p (void)
>    return HAVE_GNU_INDIRECT_FUNCTION;
>  }
>
> +/* True if we can predict this loop is possible to be transformed to a
> +   low-overhead loop, otherwise returns false.
> +
> +   By default, false is returned, as this hook's applicability should be
> +   verified for each target.  Target maintainers should re-define the hook
> +   if the target can take advantage of it.  */
> +
> +bool
> +default_predict_doloop_p (struct loop *loop ATTRIBUTE_UNUSED)
> +{
> +  return false;
> +}
> +
>  /* NULL if INSN insn is valid within a low-overhead loop, otherwise returns
>     an error message.
>
> diff --git a/gcc/targhooks.h b/gcc/targhooks.h
> index 5943627..70bfb17 100644
> --- a/gcc/targhooks.h
> +++ b/gcc/targhooks.h
> @@ -85,6 +85,7 @@ extern bool default_fixed_point_supported_p (void);
>
>  extern bool default_has_ifunc_p (void);
>
> +extern bool default_predict_doloop_p (struct loop *);
>  extern const char * default_invalid_within_doloop (const rtx_insn *);
>
>  extern tree default_builtin_vectorized_function (unsigned int, tree, tree);
> --
> 2.7.4
>

Reply via email to