Implements tree loop unroller using the infrastructure provided. gcc/ChangeLog:
2018-02-12 Kugan Vivekanandarajah <kug...@linaro.org> * Makefile.in (OBJS): Add tree-ssa-loop-unroll.o. * common.opt (ftree-loop-unroll): New option. * passes.def: Add pass_tree_loop_uroll * timevar.def (TV_TREE_LOOP_UNROLL): Add. * tree-pass.h (make_pass_tree_loop_unroll): Declare. * tree-ssa-loop-unroll.c: New file.
From 71baaf8393dd79a98b4c0216e56d87083caf0177 Mon Sep 17 00:00:00 2001 From: Kugan Vivekanandarajah <kugan.vivekanandara...@linaro.org> Date: Mon, 12 Feb 2018 10:44:00 +1100 Subject: [PATCH 2/4] tree-loop-unroller Change-Id: I58c25b5f2e796d4166af3ea4e50a0f4a3078b6c2 --- gcc/Makefile.in | 1 + gcc/common.opt | 4 + gcc/passes.def | 1 + gcc/timevar.def | 1 + gcc/tree-pass.h | 1 + gcc/tree-ssa-loop-unroll.c | 268 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 276 insertions(+) create mode 100644 gcc/tree-ssa-loop-unroll.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 374bf3e..de3c146 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1536,6 +1536,7 @@ OBJS = \ tree-ssa-loop-im.o \ tree-ssa-loop-ivcanon.o \ tree-ssa-loop-ivopts.o \ + tree-ssa-loop-unroll.o \ tree-ssa-loop-manip.o \ tree-ssa-loop-niter.o \ tree-ssa-loop-prefetch.o \ diff --git a/gcc/common.opt b/gcc/common.opt index b20a9aa..ea47b8c 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1770,6 +1770,10 @@ fivopts Common Report Var(flag_ivopts) Init(1) Optimization Optimize induction variables on trees. +ftree-loop-unroll +Common Report Var(flag_tree_loop_unroll) Init(1) Optimization +Perform loop unrolling in gimple. + fjump-tables Common Var(flag_jump_tables) Init(1) Optimization Use jump tables for sufficiently large switch statements. diff --git a/gcc/passes.def b/gcc/passes.def index 9802f08..57f7cc2 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -302,6 +302,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_predcom); NEXT_PASS (pass_complete_unroll); NEXT_PASS (pass_slp_vectorize); + NEXT_PASS (pass_tree_loop_unroll); NEXT_PASS (pass_loop_prefetch); /* Run IVOPTs after the last pass that uses data-reference analysis as that doesn't handle TARGET_MEM_REFs. */ diff --git a/gcc/timevar.def b/gcc/timevar.def index 91221ae..a6bb847 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -202,6 +202,7 @@ DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution") DEFTIMEVAR (TV_CHECK_DATA_DEPS , "tree check data dependences") DEFTIMEVAR (TV_TREE_PREFETCH , "tree prefetching") DEFTIMEVAR (TV_TREE_LOOP_IVOPTS , "tree iv optimization") +DEFTIMEVAR (TV_TREE_LOOP_UNROLL , "tree loop unroll") DEFTIMEVAR (TV_PREDCOM , "predictive commoning") DEFTIMEVAR (TV_TREE_CH , "tree copy headers") DEFTIMEVAR (TV_TREE_SSA_UNCPROP , "tree SSA uncprop") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 93a6a99..2c0740f 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -388,6 +388,7 @@ extern gimple_opt_pass *make_pass_complete_unrolli (gcc::context *ctxt); extern gimple_opt_pass *make_pass_parallelize_loops (gcc::context *ctxt); extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_tree_loop_unroll (gcc::context *ctxt); extern gimple_opt_pass *make_pass_tree_loop_done (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch (gcc::context *ctxt); extern gimple_opt_pass *make_pass_ch_vect (gcc::context *ctxt); diff --git a/gcc/tree-ssa-loop-unroll.c b/gcc/tree-ssa-loop-unroll.c new file mode 100644 index 0000000..04cf092 --- /dev/null +++ b/gcc/tree-ssa-loop-unroll.c @@ -0,0 +1,268 @@ + +/* Tree Loop Unroller. + Copyright (C) 2017 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +GCC is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "hash-set.h" +#include "machmode.h" +#include "tree.h" +#include "tree-pass.h" +#include "target.h" +#include "gimple.h" +#include "cfgloop.h" +#include "tree-ssa-loop.h" +#include "tree-scalar-evolution.h" +#include "tree-ssa-loop-manip.h" +#include "tree-ssa-loop-niter.h" +#include "tree-ssa-loop-ivopts.h" +#include "gimple.h" +#include "predict.h" +#include "cfghooks.h" +#include "tree-inline.h" +#include "gimple-iterator.h" +#include "fold-const.h" +#include "tree-data-ref.h" +#include "params.h" + +/* Count the load memory streams in the LOOP. If the streams are larger + (compared to MAX_LOAD_STREAMS), we dont need to compute all of + them. This is used to limit the partial unrolling factor to avoid + too many memory load streams. */ + +static signed +count_mem_load_streams (struct loop *loop, signed max_load_streams) +{ + basic_block *bbs = get_loop_body (loop); + unsigned nbbs = loop->num_nodes; + gimple_stmt_iterator gsi; + signed count = 0; + hash_set <data_reference_p> *dr_set = new hash_set<data_reference_p> (); + vec<data_reference_p> datarefs_vec; + datarefs_vec.create (20); + unsigned k; + + for (unsigned i = 0; i < nbbs; i++) + { + basic_block bb = bbs[i]; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + tree access_fn; + vec<tree> access_fns; + datarefs_vec.truncate (0); + + if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec)) + continue; + + for (unsigned j = 0; j < datarefs_vec.length (); ++j) + { + data_reference_p dr = datarefs_vec[j]; + if (!DR_IS_READ (dr)) + continue; + access_fns = DR_ACCESS_FNS (dr); + + FOR_EACH_VEC_ELT (access_fns, k, access_fn) + { + if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) + { + if (!dr_set->add (dr)) + count++; + break; + } + } + } + + if (count > max_load_streams / 2) + break; + } + } + free_data_refs (datarefs_vec); + free (dr_set); + return count; +} + +/* Verify that loop in a form that is suitable for unrolling. */ + +static bool +loop_form_ok_p (struct loop *loop) +{ + if (!single_exit (loop)) + return false; + + /* Loop has preheader. */ + if (!loop_preheader_edge (loop)) + return false; + + return true; +} + +/* Return false if the cost model does not prefer LOOP to be unrolled. + Return true and update the DESC if we decided that the LOOP + is to be unrolled. Also, in this case, determine the factor (NUNROLL) + by which the LOOP should be unrolled. */ + +static bool +decide_unroll_iterations (struct loop *loop, + struct tree_niter_desc *desc, + signed *nunroll) +{ + /* nunroll = total number of copies of the original loop body in + unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ + signed ninsns = tree_num_loop_insns (loop, &eni_size_weights); + signed ninsns_outer = tree_num_loop_insns (loop_outer (loop), + &eni_size_weights); + signed n_unroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns; + signed load_streams = 0; + signed max_load_streams = -1; + signed n_unroll2; + + if (ninsns_outer >= PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS)) + return false; + ninsns_outer -= ninsns; + n_unroll2 = (PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) - ninsns_outer)/ ninsns; + if (n_unroll2 < n_unroll) + n_unroll = n_unroll2; + + if (n_unroll > PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) + n_unroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); + + /* Check for simple loops. */ + edge exit = single_exit (loop); + if (!number_of_iterations_exit (loop, exit, + desc, false, false) + || !integer_onep (desc->assumptions) + || !integer_zerop (desc->may_be_zero) + || !desc->niter) + return false; + + /* Now force nunroll to be power of 2. */ + n_unroll = 1 << (floor_log2 (n_unroll)); + if (targetm.hw_max_mem_read_streams + && (max_load_streams = targetm.hw_max_mem_read_streams ()) > 0) + { + load_streams = count_mem_load_streams (loop, max_load_streams); + if (load_streams > 0) + { + signed t = 1 << (floor_log2 (max_load_streams / load_streams)); + if (t < n_unroll) + n_unroll = t; + } + } + + if (!can_unroll_loop_p (loop, n_unroll, desc)) + return false; + + *nunroll = n_unroll; + return true; +} + +/* Unroll LOOP based on the cost model. */ + +static bool +unroll_loop (struct loop *loop) +{ + struct tree_niter_desc desc; + signed n_unroll; + + if (!decide_unroll_iterations (loop, &desc, &n_unroll)) + return false; + if (n_unroll <= 1) + return false; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nunrollong loop=%d times=%d\n", loop->num, n_unroll); + initialize_original_copy_tables (); + tree_unroll_loop (loop, n_unroll, + single_dom_exit (loop), &desc); + free_original_copy_tables (); + return true; +} + +static unsigned +execute_tree_loop_unroll () +{ + struct loop *loop; + bool changed = false; + int todo_flags = 0; + if (optimize_function_for_size_p (cfun)) + return 0; + + estimate_numbers_of_iterations (cfun); + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) + { + if (!loop_form_ok_p (loop)) + continue; + if (unroll_loop (loop)) + { + changed |= true; + free_numbers_of_iterations_estimates (loop); + } + } + + if (changed) + { + scev_reset (); + todo_flags |= TODO_cleanup_cfg; + } + + return todo_flags; +} + +namespace { +const pass_data pass_data_tree_loop_unroll = +{ + GIMPLE_PASS, /* type */ + "tree-loop-unroll", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_TREE_LOOP_UNROLL, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + (TODO_update_ssa | TODO_verify_all), +}; + +class pass_tree_loop_unroll : public gimple_opt_pass +{ +public: + pass_tree_loop_unroll (gcc::context *ctxt) + : gimple_opt_pass (pass_data_tree_loop_unroll, ctxt) + {} + + /* opt_pass methods: */ + opt_pass * clone () { return new pass_tree_loop_unroll (m_ctxt); } + virtual bool gate (function *) { return flag_tree_loop_unroll != 0; } + virtual unsigned int execute (function *) + { + return execute_tree_loop_unroll (); + } + +}; // class pass_tree_loop_unroll + +} // anon namespace + +gimple_opt_pass * +make_pass_tree_loop_unroll (gcc::context *ctxt) +{ + return new pass_tree_loop_unroll (ctxt); +} + -- 2.7.4