On 13 June 2016 at 16:13, Jan Hubicka <hubi...@ucw.cz> wrote: >> diff --git a/gcc/cgraph.h b/gcc/cgraph.h >> index ecafe63..41ac408 100644 >> --- a/gcc/cgraph.h >> +++ b/gcc/cgraph.h >> @@ -1874,6 +1874,9 @@ public: >> if we did not do any inter-procedural code movement. */ >> unsigned used_by_single_function : 1; >> >> + /* Set if -fsection-anchors is set. */ >> + unsigned section_anchor : 1; >> + >> private: >> /* Assemble thunks and aliases associated to varpool node. */ >> void assemble_aliases (void); >> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c >> index 4bfcad7..e75d5c0 100644 >> --- a/gcc/cgraphunit.c >> +++ b/gcc/cgraphunit.c >> @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl) >> it is available to notice_global_symbol. */ >> node->definition = true; >> notice_global_symbol (decl); >> + >> + node->section_anchor = flag_section_anchors; >> + >> if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) >> /* Traditionally we do not eliminate static variables when not >> optimizing and when not doing toplevel reoder. */ >> diff --git a/gcc/common.opt b/gcc/common.opt >> index f0d7196..e497795 100644 >> --- a/gcc/common.opt >> +++ b/gcc/common.opt >> @@ -1590,6 +1590,10 @@ fira-algorithm= >> Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) >> Init(IRA_ALGORITHM_CB) Optimization >> -fira-algorithm=[CB|priority] Set the used IRA algorithm. >> >> +fipa-increase_alignment >> +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization >> +Option to gate increase_alignment ipa pass. >> + >> Enum >> Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA >> algorithm %qs) >> >> @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) >> Init(1) Optimization >> Enable the dependent count heuristic in the scheduler. >> >> fsection-anchors >> -Common Report Var(flag_section_anchors) Optimization >> +Common Report Var(flag_section_anchors) >> Access data in the same section from shared anchor points. >> >> fsee >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index a0db3a4..1482566 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -8252,6 +8252,8 @@ aarch64_override_options (void) >> >> aarch64_register_fma_steering (); >> >> + /* Enable increase_alignment pass. */ >> + flag_ipa_increase_alignment = 1; > > I would rather enable it always on targets that do support anchors. AFAIK aarch64 supports section anchors. >> diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c >> index ce9e146..7f09f3a 100644 >> --- a/gcc/lto/lto-symtab.c >> +++ b/gcc/lto/lto-symtab.c >> @@ -342,6 +342,13 @@ lto_symtab_merge (symtab_node *prevailing, symtab_node >> *entry) >> The type compatibility checks or the completing of types has properly >> dealt with most issues. */ >> >> + /* ??? is this assert necessary ? */ >> + varpool_node *v_prevailing = dyn_cast<varpool_node *> (prevailing); >> + varpool_node *v_entry = dyn_cast<varpool_node *> (entry); >> + gcc_assert (v_prevailing && v_entry); >> + /* section_anchor of prevailing_decl wins. */ >> + v_entry->section_anchor = v_prevailing->section_anchor; >> + > Other flags are merged in lto_varpool_replace_node so please move this there. Ah indeed, thanks for the pointers. I wonder though if we need to set prevailing_node->section_anchor = vnode->section_anchor ? IIUC, the function merges flags from vnode into prevailing_node and removes vnode. However we want prevailing_node->section_anchor to always take precedence. >> +/* Return true if alignment should be increased for this vnode. >> + This is done if every function that references/referring to vnode >> + has flag_tree_loop_vectorize set. */ >> + >> +static bool >> +increase_alignment_p (varpool_node *vnode) >> +{ >> + ipa_ref *ref; >> + >> + for (int i = 0; vnode->iterate_reference (i, ref); i++) >> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referred)) >> + { >> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >> + if (!opts->x_flag_tree_loop_vectorize) >> + return false; >> + } > > If you take address of function that has vectorizer enabled probably doesn't > imply need to increase alignment of that var. So please drop the loop. > > You only want function that read/writes or takes address of the symbol. But > onthe other hand, you need to walk all aliases of the symbol by > call_for_symbol_and_aliases >> + >> + for (int i = 0; vnode->iterate_referring (i, ref); i++) >> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referring)) >> + { >> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >> + if (!opts->x_flag_tree_loop_vectorize) >> + return false; >> + } >> + >> + return true; >> +} >> + >> /* Entry point to increase_alignment pass. */ >> static unsigned int >> increase_alignment (void) >> @@ -914,9 +942,12 @@ increase_alignment (void) >> tree decl = vnode->decl; >> unsigned int alignment; >> >> - if ((decl_in_symtab_p (decl) >> - && !symtab_node::get (decl)->can_increase_alignment_p ()) >> - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) >> + if (!vnode->section_anchor >> + || (decl_in_symtab_p (decl) >> + && !symtab_node::get (decl)->can_increase_alignment_p ()) >> + || DECL_USER_ALIGN (decl) >> + || DECL_ARTIFICIAL (decl) >> + || !increase_alignment_p (vnode)) > > Incrementally we probably should do more testing whether the variable looks > like > someting that can be vectorized, i.e. it contains array, has address taken or > the > accesses are array accesses within loop. > This can be done by the analysis phase of the IPA pass inspecting the function > bodies. Thanks, I will try to check for array accesses are within a loop in followup patch. I was wondering if we could we treat a homogeneous global struct (having members of one type), as a global array of that type and increase it's alignment if required ? > > I think it is important waste to bump up everything including error messages > etc. > At least on i386 the effect on firefox datasegment of various alignment > setting is > very visible. Um for a start, would it be OK to check if all functions referencing variable have attribute noreturn, and in that case we skip increasing the alignment ? I suppose that error functions would be having attribute noreturn set ? > > Looks OK to me otherwise. please send updated patch. I have done the changes in the attached patch (stage-1 built). I am not sure what to return from the callback function and arbitrarily chose to return true.
Thanks, Prathamesh > > Honza
diff --git a/gcc/cgraph.h b/gcc/cgraph.h index ecafe63..41ac408 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1874,6 +1874,9 @@ public: if we did not do any inter-procedural code movement. */ unsigned used_by_single_function : 1; + /* Set if -fsection-anchors is set. */ + unsigned section_anchor : 1; + private: /* Assemble thunks and aliases associated to varpool node. */ void assemble_aliases (void); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 4bfcad7..e75d5c0 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl) it is available to notice_global_symbol. */ node->definition = true; notice_global_symbol (decl); + + node->section_anchor = flag_section_anchors; + if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) /* Traditionally we do not eliminate static variables when not optimizing and when not doing toplevel reoder. */ diff --git a/gcc/common.opt b/gcc/common.opt index f0d7196..e497795 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1590,6 +1590,10 @@ fira-algorithm= Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) Optimization -fira-algorithm=[CB|priority] Set the used IRA algorithm. +fipa-increase_alignment +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization +Option to gate increase_alignment ipa pass. + Enum Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA algorithm %qs) @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) Init(1) Optimization Enable the dependent count heuristic in the scheduler. fsection-anchors -Common Report Var(flag_section_anchors) Optimization +Common Report Var(flag_section_anchors) Access data in the same section from shared anchor points. fsee diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a0db3a4..1482566 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -8252,6 +8252,8 @@ aarch64_override_options (void) aarch64_register_fma_steering (); + /* Enable increase_alignment pass. */ + flag_ipa_increase_alignment = 1; } /* Implement targetm.override_options_after_change. */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 3503c15..b7f448e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -3458,6 +3458,9 @@ arm_option_override (void) /* Init initial mode for testing. */ thumb_flipper = TARGET_THUMB; + + /* Enable increase_alignment pass. */ + flag_ipa_increase_alignment = 1; } static void diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2d7df6b..ed59068 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5011,6 +5011,9 @@ rs6000_option_override (void) = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE }; register_pass (&analyze_swaps_info); + + /* Enable increase_alignment pass. */ + flag_ipa_increase_alignment = 1; } diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 5cef2ba..289d9c3 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -627,6 +627,7 @@ lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node *node, bp_pack_value (&bp, node->tls_model, 3); bp_pack_value (&bp, node->used_by_single_function, 1); bp_pack_value (&bp, node->need_bounds_init, 1); + bp_pack_value (&bp, node->section_anchor, 1); streamer_write_bitpack (&bp); group = node->get_comdat_group (); @@ -1401,6 +1402,7 @@ input_varpool_node (struct lto_file_decl_data *file_data, node->tls_model = (enum tls_model)bp_unpack_value (&bp, 3); node->used_by_single_function = (enum tls_model)bp_unpack_value (&bp, 1); node->need_bounds_init = bp_unpack_value (&bp, 1); + node->section_anchor = bp_unpack_value (&bp, 1); group = read_identifier (ib); if (group) { diff --git a/gcc/passes.def b/gcc/passes.def index 3647e90..3a8063c 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -138,12 +138,12 @@ along with GCC; see the file COPYING3. If not see PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile) NEXT_PASS (pass_feedback_split_functions); POP_INSERT_PASSES () - NEXT_PASS (pass_ipa_increase_alignment); NEXT_PASS (pass_ipa_tm); NEXT_PASS (pass_ipa_lower_emutls); TERMINATE_PASS_LIST (all_small_ipa_passes) INSERT_PASSES_AFTER (all_regular_ipa_passes) + NEXT_PASS (pass_ipa_increase_alignment); NEXT_PASS (pass_ipa_whole_program_visibility); NEXT_PASS (pass_ipa_profile); NEXT_PASS (pass_ipa_icf); diff --git a/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c new file mode 100644 index 0000000..74eaed8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target section_anchors } */ +/* { dg-require-effective-target vect_int } */ + +#define N 32 + +/* Clone of section-anchors-vect-70.c with foo() having -fno-tree-loop-vectorize. */ + +static struct A { + int p1, p2; + int e[N]; +} a, b, c; + +__attribute__((optimize("-fno-tree-loop-vectorize"))) +int foo(void) +{ + for (int i = 0; i < N; i++) + a.e[i] = b.e[i] + c.e[i]; + + return a.e[0]; +} + +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target aarch64*-*-* } } } */ +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target powerpc64*-*-* } } } */ +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target arm*-*-* } } } */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 36299a6..d36aa1d 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -483,7 +483,7 @@ extern simple_ipa_opt_pass *make_pass_local_optimization_passes (gcc::context *c extern ipa_opt_pass_d *make_pass_ipa_whole_program_visibility (gcc::context *ctxt); -extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context +extern ipa_opt_pass_d *make_pass_ipa_increase_alignment (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 2669813..c693950 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -899,6 +899,55 @@ get_vec_alignment_for_type (tree type) return (alignment > TYPE_ALIGN (type)) ? alignment : 0; } +/* Return true if alignment should be increased for this vnode. + This is done if every function that referring to vnode + has flag_tree_loop_vectorize set. */ + +static bool +increase_alignment_p (varpool_node *vnode) +{ + ipa_ref *ref; + cgraph_node *cnode; + + for (int i = 0; vnode->iterate_referring (i, ref); i++) + /* Walk those functions that read/write/take address of vnode. */ + if ((cnode = dyn_cast<cgraph_node *> (ref->referring)) + && (ref->use == IPA_REF_LOAD || ref->use == IPA_REF_STORE || ref->use == IPA_REF_ADDR)) + { + struct cl_optimization *opts = opts_for_fn (cnode->decl); + if (!opts->x_flag_tree_loop_vectorize) + return false; + } + + return true; +} + +static bool +increase_alignment_callback (varpool_node *vnode, void *data ATTRIBUTE_UNUSED) +{ + tree decl = vnode->decl; + + if (!vnode->section_anchor + || (decl_in_symtab_p (decl) + && !symtab_node::get (decl)->can_increase_alignment_p ()) + || DECL_USER_ALIGN (decl) + || DECL_ARTIFICIAL (decl) + || !increase_alignment_p (vnode)) + return true; + + unsigned alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); + if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) + { + vnode->increase_alignment (alignment); + dump_printf (MSG_NOTE, "Increasing alignment of decl: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); + dump_printf (MSG_NOTE, "\n"); + return true; + } + + return true; +} + /* Entry point to increase_alignment pass. */ static unsigned int increase_alignment (void) @@ -910,24 +959,7 @@ increase_alignment (void) /* Increase the alignment of all global arrays for vectorization. */ FOR_EACH_DEFINED_VARIABLE (vnode) - { - tree decl = vnode->decl; - unsigned int alignment; - - if ((decl_in_symtab_p (decl) - && !symtab_node::get (decl)->can_increase_alignment_p ()) - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) - continue; - - alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); - if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) - { - vnode->increase_alignment (alignment); - dump_printf (MSG_NOTE, "Increasing alignment of decl: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); - dump_printf (MSG_NOTE, "\n"); - } - } + vnode->call_for_symbol_and_aliases (increase_alignment_callback, NULL, true); delete type_align_map; return 0; @@ -938,7 +970,7 @@ namespace { const pass_data pass_data_ipa_increase_alignment = { - SIMPLE_IPA_PASS, /* type */ + IPA_PASS, /* type */ "increase_alignment", /* name */ OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ TV_IPA_OPT, /* tv_id */ @@ -949,17 +981,26 @@ const pass_data pass_data_ipa_increase_alignment = 0, /* todo_flags_finish */ }; -class pass_ipa_increase_alignment : public simple_ipa_opt_pass +class pass_ipa_increase_alignment : public ipa_opt_pass_d { public: pass_ipa_increase_alignment (gcc::context *ctxt) - : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) + : ipa_opt_pass_d (pass_data_ipa_increase_alignment, ctxt, + NULL, /* generate_summary */ + NULL, /* write summary */ + NULL, /* read summary */ + NULL, /* write optimization summary */ + NULL, /* read optimization summary */ + NULL, /* stmt fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* transform function */ + NULL )/* variable transform */ {} /* opt_pass methods: */ virtual bool gate (function *) { - return flag_section_anchors && flag_tree_loop_vectorize; + return flag_ipa_increase_alignment != 0; } virtual unsigned int execute (function *) { return increase_alignment (); } @@ -968,7 +1009,7 @@ public: } // anon namespace -simple_ipa_opt_pass * +ipa_opt_pass_d * make_pass_ipa_increase_alignment (gcc::context *ctxt) { return new pass_ipa_increase_alignment (ctxt);