On Tue, 2014-08-12 at 14:40 -0600, Jeff Law wrote: > On 08/12/14 14:23, Richard Biener wrote: > > On August 12, 2014 8:31:16 PM CEST, Jeff Law <l...@redhat.com> wrote: > >> Try setting the header & latch fields for the loop structure to NULL, > >> then call loops_set_state (LOOPS_NEED_FIXUP). > > > > But that is _not_ the appropriate way of keeping loops preserved! > I think that's done when we've scrogged the loop beyond repair and want > the structures rebuilt. IIRC, that's what you recommended we do. > > jeff
Well, I don't know if it is the 'right' thing to do, but it does seem to work. Here is a new patch that seems to be working and that also addresses the comments that Jakub Jelinek had. I want to do more testing before I officially submit it but I thought I would put it out here for others to look over again while I do that. Steve Ellcey sell...@mips.com 2014-08-12 Steve Ellcey <sell...@mips.com> PR tree-opt/54742 * Makefile.in (OBJS): Add tree-switch-shortcut.o. * common.opt (ftree-switch-shortcut): New. * opts.c (default_options_table): Add OPT_ftree_switch_shortcut. * params.def (PARAM_MAX_SWITCH_INSNS): New. (PARAM_MAX_SWITCH_PATHS): New. * passes.def (pass_tree_switch_shortcut): New. * timevar.def (TV_TREE_SWITCH_SHORTCUT): New. * tree-pass.h (make_pass_tree_switch_shortcut): New. * tree-switch-shortcut.c: New.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 31c1f4d..94e8ec4 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1411,6 +1411,7 @@ OBJS = \ tree-scalar-evolution.o \ tree-sra.o \ tree-switch-conversion.o \ + tree-switch-shortcut.o \ tree-ssa-address.o \ tree-ssa-alias.o \ tree-ssa-ccp.o \ diff --git a/gcc/common.opt b/gcc/common.opt index 0c4f86b..fe0664a 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -2249,6 +2249,10 @@ ftree-sra Common Report Var(flag_tree_sra) Optimization Perform scalar replacement of aggregates +ftree-switch-shortcut +Common Report Var(flag_tree_switch_shortcut) Init(0) Optimization +Convert jumps to switch statements into jumps to case statement. + ftree-ter Common Report Var(flag_tree_ter) Optimization Replace temporary expressions in the SSA->normal pass diff --git a/gcc/opts.c b/gcc/opts.c index be1867c..f1ac2e5 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -514,6 +514,7 @@ static const struct default_options default_options_table[] = { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC }, { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, + { OPT_LEVELS_3_PLUS, OPT_ftree_switch_shortcut, NULL, 1 }, /* -Ofast adds optimizations to -O3. */ { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 }, diff --git a/gcc/params.def b/gcc/params.def index cad00e2..65377d3 100644 --- a/gcc/params.def +++ b/gcc/params.def @@ -1058,6 +1058,20 @@ DEFPARAM (PARAM_MAX_SLSR_CANDIDATE_SCAN, "strength reduction", 50, 1, 999999) +/* Maximum number of instructions to duplicate when shortcutting a switch. */ +DEFPARAM (PARAM_MAX_SWITCH_INSNS, + "max-switch-insns", + "Maximum number of instructions to duplicate when " + "shortcutting a switch statement", + 100, 1, 999999) + +/* Maximum number of paths to duplicate when shortcutting a switch. */ +DEFPARAM (PARAM_MAX_SWITCH_PATHS, + "max-switch-paths", + "Maximum number of new paths to create when" + " shortcutting a switch statement", + 50, 1, 999999) + DEFPARAM (PARAM_ASAN_STACK, "asan-stack", "Enable asan stack protection", diff --git a/gcc/passes.def b/gcc/passes.def index f13df6c..8bbf2d0 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -157,6 +157,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_cselim); NEXT_PASS (pass_copy_prop); NEXT_PASS (pass_tree_ifcombine); + NEXT_PASS (pass_tree_switch_shortcut); NEXT_PASS (pass_phiopt); NEXT_PASS (pass_tail_recursion); NEXT_PASS (pass_ch); diff --git a/gcc/timevar.def b/gcc/timevar.def index a04d05c..d9ee915 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -170,6 +170,7 @@ DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv") DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop") DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") DEFTIMEVAR (TV_COMPLETE_UNROLL , "complete unrolling") +DEFTIMEVAR (TV_TREE_SWITCH_SHORTCUT , "switch statement shortcuts") DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops") DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization") DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization") diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 1477d1f..f898e27 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -575,6 +575,7 @@ extern gimple_opt_pass *make_pass_early_inline (gcc::context *ctxt); extern gimple_opt_pass *make_pass_inline_parameters (gcc::context *ctxt); extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt); extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt); +extern gimple_opt_pass *make_pass_tree_switch_shortcut (gcc::context *ctxt); /* Current optimization pass. */ extern opt_pass *current_pass; diff --git a/gcc/tree-switch-shortcut.c b/gcc/tree-switch-shortcut.c new file mode 100644 index 0000000..21d48f8 --- /dev/null +++ b/gcc/tree-switch-shortcut.c @@ -0,0 +1,438 @@ +/* Switch shortcutting optimization for GNU C + Copyright (C) 2013 Free Software Foundation, Inc. + Contributed by Steve Ellcey (steve.ell...@imgtec.com). + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* This file implements an optimization where, when a variable is set + to a constant value and there is a path that leads from that definition + to a switch statement that uses that variable as its controlling expression + we duplicate the blocks on this path and change the jump to the switch + statement with a direct jump to the label of the switch block that control + would goto based on the value of the variable. This can come up in + loops/switch statements that implement state machines. + + Example (modified from PR 54742): + + foo(char *str) { + int sum=0; + int state=0; + char *s=str; + for (; *s; s++) { + char c=*s; + <CODE BLOCK 1> + switch (state) { + case 0: + if (c == '+') { state = 1; sum += 9; } + else if (c != '-') { state = 2; sum += 3; } + break; + case 1: + if (c == '+') { state = 2; sum += 4; } + else if (c == '-') { state = 0; sum += 7; } + break; + case 2: + if (c == '+') { state = 0; sum += 8; } + else if (c == '-') { state = 1; sum += 2; } + break; + } + <CODE BLOCK 2> + } + return state; + } + + This pass will convert the code inside 'case 0' to something like: + + case 0: + if (c == '+') { state = 1; sum += 9; + <CODE BLOCK 2> + s++; if (!s) goto loop_exit; + <CODE BLOCK 1> + goto case_1; } + else if (c != '-') { state = 2; sum += 3; + <CODE BLOCK 2> + s++; if (!s) goto loop_exit; + <CODE BLOCK 1> + goto case_2; } + else { <CODE BLOCK 2> + s++; if (!s) goto exit; + <CODE BLOCK 1> + goto case_0; } + +Similar transformations would apply to the other parts of the switch +statement. This obviously can lead to a lot of code duplication but +it can also result in faster code since we are replacing two jumps +(one indirect) with a single direct jump. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "params.h" +#include "flags.h" +#include "tree.h" +#include "tree-pass.h" +#include "basic-block.h" +#include "function.h" +#include "hash-table.h" +#include "tree-ssa-alias.h" +#include "tree-cfg.h" +#include "tree-ssa-operands.h" +#include "tree-inline.h" +#include "gimple-expr.h" +#include "is-a.h" +#include "gimple.h" +#include "tree-phinodes.h" +#include "gimple-iterator.h" +#include "gimple-ssa.h" +#include "ssa-iterators.h" +#include "tree-into-ssa.h" +#include "cfgloop.h" + +/* Helper function for find_path, visited_bbs is used to make sure we don't + fall into an infinite loop. */ + +static int +find_path_1 (basic_block start_bb, basic_block end_bb, + hash_set<basic_block> *visited_bbs) +{ + edge_iterator ei; + edge e; + + if (start_bb == end_bb) return 1; + + if (!visited_bbs->add (start_bb)) + { + FOR_EACH_EDGE (e, ei, start_bb->succs) + if (find_path_1 (e->dest, end_bb, visited_bbs)) + return 1; + } + return 0; +} + +/* Return 1 if there is a path from start_bb to end_bb and 0 if there + is not. There may be multiple paths from start_bb to end_bb. */ + +static int +find_path (basic_block start_bb, basic_block end_bb) +{ + edge_iterator ei; + edge e; + hash_set<basic_block> visited_bbs; + int p = 0; + + if (start_bb == end_bb) return 1; + + if (!visited_bbs.add (start_bb)) + { + FOR_EACH_EDGE (e, ei, start_bb->succs) + if (find_path_1 (e->dest, end_bb, &visited_bbs)) + { + p = 1; + break; + } + } + return p; +} + + +/* We save the paths we want to copy in bbs_list_array. n_bbs_list is the + number of paths saved, bbs_list_array[i] is the list of basic blocks in + one path. Each path starts with the block where a variable is assigned + a constant value (bbs_list_array[i][0]) and ends with the switch statement + block (bbs_list_array[i][bbs_list_size[i]-2]) followed by the block that + the switch statement is going to go to given the constant value of the + variable (bbs_list_array[i][bbs_list_size[i]-1]). */ + +struct path_info +{ + basic_block **bbs_list_array; + int *val_array; + int *bbs_list_size; + int max_path_count; + int max_insn_count; + int n_bbs_list; +}; + +/* bbs_list[0] is the block with the switch statement, + bbs_list[n-1] is the block where the switch statement variable is assigned + a constant value, + The entries in between make a (reverse) path between the two. + + We don't want to change bb_list, we want to leave that alone and + and copy the path to bbs_list_array so that we wind up with a list (array) + of paths that we want to update. We also want to add the block that the + switch is going to go to on to the list so that we know which exit from + the switch statement is important. */ + +static void +save_new_path (basic_block *bbs_list, int n, tree val, path_info *pi) +{ + int i; + int insn_count; + basic_block bb; + edge switch_taken_edge; + gimple_stmt_iterator gsi; + + if (n <= 1) return; + + if (pi->n_bbs_list >= pi->max_path_count) + return; + + /* Put the blocks in 'correct' order and add in where we want to go after + the switch statement, We want to leave bbs_list untouched for future + calls. */ + + pi->bbs_list_array[pi->n_bbs_list] = XNEWVEC (basic_block, n+1); + for (i = 0; i < n; i++) + pi->bbs_list_array[pi->n_bbs_list][i] = bbs_list[n-i-1]; + + switch_taken_edge = find_taken_edge (bbs_list[0], val); + pi->bbs_list_array[pi->n_bbs_list][n] = switch_taken_edge->dest; + + pi->bbs_list_size[pi->n_bbs_list] = n + 1; + pi->val_array[pi->n_bbs_list] = (int) TREE_INT_CST_LOW (val); + + /* Count how many instructions are in the blocks we are going to + duplicate and if there are too many do not save this path + (return without incrementing n_bbs_list). */ + + insn_count = 0; + for (i = 1; i < n; i++) + { + bb = pi->bbs_list_array[pi->n_bbs_list][i]; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + insn_count += estimate_num_insns (gsi_stmt (gsi), &eni_size_weights); + } + + if (insn_count > pi->max_insn_count) + return; + + pi->n_bbs_list = pi->n_bbs_list + 1; +} + +/* switch_stmt is a switch statement whose switch index expression + is the variable expr. We trace the value of the variable back + through any phi nodes looking for places where it gets a constant + value and save the path in bbs_list. Then we call save_new_path + to create a list of such paths. */ + +static void +process_switch (tree expr, gimple switch_stmt, + hash_set<gimple> *visited_phis, + basic_block *bbs_list, int n, + path_info *pi) +{ + gimple def_stmt; + tree var; + unsigned int i; + edge e; + edge_iterator ei; + basic_block bbx; + basic_block var_bb; + int e_count; + + gcc_assert (gimple_code (switch_stmt) == GIMPLE_SWITCH); + var = SSA_NAME_VAR (expr); + def_stmt = SSA_NAME_DEF_STMT (expr); + var_bb = gimple_bb (def_stmt); + + if (var == NULL || var_bb == NULL) return; + + /* We have a variable definition (var) that is defined in var_bb, + We want to put the path from var_bb to the current bb into the + bbs_list. If there is more then one path, skip this and don't + try to do the optimization. */ + + bbx = bbs_list[n-1]; + while (bbx != var_bb) + { + e_count = 0; + FOR_EACH_EDGE (e, ei, bbx->preds) + if (find_path (var_bb, e->src)) + { + bbs_list[n] = e->src; + n = n + 1; + e_count = e_count + 1; + } + if (e_count != 1) return; + bbx = bbs_list[n-1]; + } + + if (gimple_code (def_stmt) == GIMPLE_PHI + && !visited_phis->add (def_stmt)) + { + for (i = 0; i < gimple_phi_num_args (def_stmt); i++) + { + tree arg = gimple_phi_arg_def (def_stmt, i); + if (arg && TREE_CODE (arg) == INTEGER_CST) + { + /* const char *name = IDENTIFIER_POINTER (DECL_NAME (var)); */ + bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src; + save_new_path (bbs_list, n + 1, arg, pi); + } + else if (arg && TREE_CODE (arg) == SSA_NAME) + { + bbs_list[n] = gimple_phi_arg_edge (def_stmt, i)->src; + process_switch (arg, switch_stmt, visited_phis, bbs_list, n+1, pi); + } + } + } +} + +/* Find paths that lead from blocks where a variable is assigned a constant + value to a switch statement where that variable is used as the switch + index. Save the paths in bbs_list_array so that they can be processed + by copy_switch_paths. */ + +static unsigned int +find_switch_shortcuts (function *fun, path_info *pi) +{ + basic_block bb; + hash_set<gimple> visited_phis; + basic_block *bbs_list; + int n = 1; + + bbs_list = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun)); + FOR_EACH_BB_FN (bb, fun) + { + gimple stmt = last_stmt (bb); + if (stmt && gimple_code (stmt) == GIMPLE_SWITCH) + { + tree op = gimple_switch_index (stmt); + tree var = SSA_NAME_VAR (op); + if (var) + { + bbs_list[0] = bb; + process_switch (op, stmt, &visited_phis, bbs_list, n, pi); + } + } + } + XDELETEVEC (bbs_list); + return 0; +} + +/* Call gimple_duplicate_sese_region to douplicate the blocks in bb_list. + We free and recalculate all ssa and dominance information afterwords + because the region being copied is not really SESE and so we cannot + trust gimple_duplicate_sese_region to correctly update the dataflow + information. */ + +static void +duplicate_blocks (basic_block *bb_list, int bb_count) +{ + edge orig_edge, exit_edge; + loop_p loop; + + orig_edge = find_edge (bb_list[0], bb_list[1]); + exit_edge = find_edge (bb_list[bb_count-2], bb_list[bb_count-1]); + gimple_duplicate_sese_region (orig_edge, exit_edge, &bb_list[1], + bb_count-2, NULL, false); + free_dominance_info (CDI_DOMINATORS); + update_ssa (TODO_update_ssa); + calculate_dominance_info (CDI_DOMINATORS); + FOR_EACH_LOOP (loop, 0) + { + loop->latch = NULL; + loop->header = NULL; + } + loops_state_set (LOOPS_NEED_FIXUP); +} + +/* Go through the paths saved in bbs_list_array and make copies of them. */ + +static void +copy_switch_paths (path_info *pi) +{ + int i; + + /* Process each path in bbs_list_size. */ + for (i = 0; i < pi->n_bbs_list; i++) + { + /* For each path in bbs_list_size loop through and copy each block in + the path (except the first on where the constant is assigned and + the final one where the switch statement goes to. */ + + if (!single_pred_p (pi->bbs_list_array[i][1])) + duplicate_blocks (pi->bbs_list_array[i], pi->bbs_list_size[i]); + } +} + + +/* Main entry for the tree if-conversion pass. */ + +namespace { + +const pass_data pass_data_tree_switch_shortcut = +{ + GIMPLE_PASS, /* type */ + "switch_shortcut", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_TREE_SWITCH_SHORTCUT, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_update_ssa, /* todo_flags_finish */ +}; + +class pass_tree_switch_shortcut : public gimple_opt_pass +{ +public: + pass_tree_switch_shortcut (gcc::context *ctxt) + : gimple_opt_pass (pass_data_tree_switch_shortcut, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_tree_switch_shortcut; + } + virtual unsigned int execute (function *); + +}; // class pass_tree_switch_shortcut + +unsigned int +pass_tree_switch_shortcut::execute (function *fun) +{ + int i; + path_info *pi; + + pi = XNEW (path_info); + pi->n_bbs_list = 0; + pi->max_insn_count = PARAM_VALUE (PARAM_MAX_SWITCH_INSNS); + pi->max_path_count = PARAM_VALUE (PARAM_MAX_SWITCH_PATHS); + pi->val_array = XNEWVEC (int, pi->max_path_count); + pi->bbs_list_size = XNEWVEC (int, pi->max_path_count); + pi->bbs_list_array = XNEWVEC (basic_block *, pi->max_path_count); + find_switch_shortcuts (fun, pi); + copy_switch_paths (pi); + XDELETEVEC (pi->val_array); + XDELETEVEC (pi->bbs_list_size); + for (i = 0; i < pi->n_bbs_list; i++) + XDELETEVEC (pi->bbs_list_array[i]); + XDELETEVEC (pi->bbs_list_array); + XDELETE (pi); + return 0; +} + +} // anon namespace + +gimple_opt_pass * +make_pass_tree_switch_shortcut (gcc::context *ctxt) +{ + return new pass_tree_switch_shortcut (ctxt); +}