Hi Richard,
Richard Biener <richard.guent...@gmail.com> writes: > On Thu, Nov 12, 2020 at 11:11 AM Frederik Harwath > <frede...@codesourcery.com> wrote: >> >> This patch enables the use of Graphite for the analysis of OpenACC >> "auto" loops. [...] >> Furthermore, Graphite is extended by functionality that extends >> its applicability to real-world code (e.g. runtime alias checking). > > I wonder if this can be split into a refactoring of graphite and adding > runtime alias capability and a part doing the OpenACC pieces. > Yes, I did not remove the runtime alias checking from this WIP-patch, but I planned to submit it separately. I am going to do this soon. Frederik > Richard. > >> --- >> gcc/common.opt | 8 + >> gcc/graphite-dependences.c | 12 +- >> gcc/graphite-isl-ast-to-gimple.c | 77 +- >> gcc/graphite-oacc.h | 90 ++ >> gcc/graphite-scop-detection.c | 828 ++++++++++++++---- >> gcc/graphite-sese-to-poly.c | 26 +- >> gcc/graphite.c | 403 ++++++++- >> gcc/graphite.h | 11 +- >> gcc/internal-fn.h | 7 +- >> gcc/omp-expand.c | 26 +- >> gcc/omp-offload.c | 173 +++- >> gcc/predict.c | 2 +- >> .../graphite/alias-0-no-runtime-check.c | 20 + >> .../gcc.dg/graphite/alias-0-runtime-check.c | 21 + >> gcc/testsuite/gcc.dg/graphite/alias-1.c | 22 + >> gcc/tree-chrec-oacc.h | 45 + >> gcc/tree-chrec.c | 16 +- >> gcc/tree-data-ref.c | 112 ++- >> gcc/tree-data-ref.h | 8 +- >> gcc/tree-loop-distribution.c | 17 +- >> gcc/tree-scalar-evolution.c | 257 +++++- >> gcc/tree-ssa-loop-ivcanon.c | 9 +- >> gcc/tree-ssa-loop-niter.c | 13 + >> 23 files changed, 1870 insertions(+), 333 deletions(-) >> create mode 100644 gcc/graphite-oacc.h >> create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c >> create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c >> create mode 100644 gcc/testsuite/gcc.dg/graphite/alias-1.c >> create mode 100644 gcc/tree-chrec-oacc.h >> >> diff --git a/gcc/common.opt b/gcc/common.opt >> index dfed6ec76ba..caaeaa1aa6f 100644 >> --- a/gcc/common.opt >> +++ b/gcc/common.opt >> @@ -1600,6 +1600,14 @@ fgraphite-identity >> Common Report Var(flag_graphite_identity) Optimization >> Enable Graphite Identity transformation. >> >> +fgraphite-non-affine-accesses >> +Common Report Var(flag_graphite_non_affine_accesses) Init(0) >> +Allow Graphite to handle non-affine data accesses. >> + >> +fgraphite-runtime-alias-checks >> +Common Report Var(flag_graphite_runtime_alias_checks) Optimization Init(1) >> +Allow Graphite to add runtime alias checks to loops if aliasing cannot be >> resolved statically. >> + >> fhoist-adjacent-loads >> Common Report Var(flag_hoist_adjacent_loads) Optimization >> Enable hoisting adjacent loads to encourage generating conditional move >> diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c >> index 7078c949800..76ba027cdf3 100644 >> --- a/gcc/graphite-dependences.c >> +++ b/gcc/graphite-dependences.c >> @@ -82,7 +82,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map >> *&reads, >> { >> if (dump_file) >> { >> - fprintf (dump_file, "Adding read to depedence graph: "); >> + fprintf (dump_file, "Adding read to dependence graph: "); >> print_pdr (dump_file, pdr); >> } >> isl_union_map *um >> @@ -90,7 +90,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map >> *&reads, >> reads = isl_union_map_union (reads, um); >> if (dump_file) >> { >> - fprintf (dump_file, "Reads depedence graph: "); >> + fprintf (dump_file, "Reads dependence graph: "); >> print_isl_union_map (dump_file, reads); >> } >> } >> @@ -98,7 +98,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map >> *&reads, >> { >> if (dump_file) >> { >> - fprintf (dump_file, "Adding must write to depedence graph: >> "); >> + fprintf (dump_file, "Adding must write to dependence graph: >> "); >> print_pdr (dump_file, pdr); >> } >> isl_union_map *um >> @@ -106,7 +106,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map >> *&reads, >> must_writes = isl_union_map_union (must_writes, um); >> if (dump_file) >> { >> - fprintf (dump_file, "Must writes depedence graph: "); >> + fprintf (dump_file, "Must writes dependence graph: "); >> print_isl_union_map (dump_file, must_writes); >> } >> } >> @@ -114,7 +114,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map >> *&reads, >> { >> if (dump_file) >> { >> - fprintf (dump_file, "Adding may write to depedence graph: "); >> + fprintf (dump_file, "Adding may write to dependence graph: >> "); >> print_pdr (dump_file, pdr); >> } >> isl_union_map *um >> @@ -122,7 +122,7 @@ scop_get_reads_and_writes (scop_p scop, isl_union_map >> *&reads, >> may_writes = isl_union_map_union (may_writes, um); >> if (dump_file) >> { >> - fprintf (dump_file, "May writes depedence graph: "); >> + fprintf (dump_file, "May writes dependence graph: "); >> print_isl_union_map (dump_file, may_writes); >> } >> } >> diff --git a/gcc/graphite-isl-ast-to-gimple.c >> b/gcc/graphite-isl-ast-to-gimple.c >> index ef93fda2233..98c61ff864e 100644 >> --- a/gcc/graphite-isl-ast-to-gimple.c >> +++ b/gcc/graphite-isl-ast-to-gimple.c >> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-ssa.h" >> #include "tree-vectorizer.h" >> #include "graphite.h" >> +#include "graphite-oacc.h" >> >> struct ast_build_info >> { >> @@ -635,12 +636,18 @@ translate_isl_ast_for_loop (loop_p context_loop, >> redirect_edge_succ_nodup (next_e, after); >> set_immediate_dominator (CDI_DOMINATORS, next_e->dest, next_e->src); >> >> - if (flag_loop_parallelize_all) >> + if (flag_loop_parallelize_all || oacc_function_p (cfun)) >> { >> isl_id *id = isl_ast_node_get_annotation (node_for); >> gcc_assert (id); >> ast_build_info *for_info = (ast_build_info *) isl_id_get_user (id); >> loop->can_be_parallel = for_info->is_parallelizable; >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + { >> + dump_user_location_t loc = find_loop_location (loop); >> + dump_printf_loc (MSG_NOTE, loc, "loop can be parallel: %d \n", >> + loop->can_be_parallel); >> + } >> free (for_info); >> isl_id_free (id); >> } >> @@ -1027,7 +1034,7 @@ gsi_insert_earliest (gimple_seq seq) >> basic_block begin_bb = get_entry_bb (codegen_region); >> >> /* Inserting the gimple statements in a vector because gimple_seq behave >> - in strage ways when inserting the stmts from it into different basic >> + in strange ways when inserting the stmts from it into different basic >> blocks one at a time. */ >> auto_vec<gimple *, 3> stmts; >> for (gimple_stmt_iterator gsi = gsi_start (seq); !gsi_end_p (gsi); >> @@ -1397,7 +1404,7 @@ scop_to_isl_ast (scop_p scop) >> (isl_schedule_copy (scop->transformed_schedule), set_separate_option, >> NULL); >> isl_ast_build *context_isl = generate_isl_context (scop); >> >> - if (flag_loop_parallelize_all) >> + if (flag_loop_parallelize_all || oacc_function_p (cfun)) >> { >> scop_get_dependences (scop); >> context_isl = >> @@ -1464,6 +1471,42 @@ generate_entry_out_of_ssa_copies (edge false_entry, >> } >> } >> >> +/* Defined in tree-loop-distribution.c */ >> +/* TODO Move this function to tree-data-ref.c? */ >> + >> +void >> +compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs, >> + vec<dr_with_seg_len_pair_t> *comp_alias_pairs); >> + >> + >> +/* Create a condition that evaluates to TRUE if some ALIAS_DDRS >> + do alias. */ >> + >> +static tree >> +generate_alias_cond (vec<ddr_p>& alias_ddrs, loop_p context_loop) >> +{ >> + gcc_checking_assert (flag_graphite_runtime_alias_checks >> + && alias_ddrs.length () > 0); >> + gcc_checking_assert (context_loop); >> + >> + auto_vec<dr_with_seg_len_pair_t> check_pairs; >> + compute_alias_check_pairs (context_loop, &alias_ddrs, &check_pairs); >> + gcc_checking_assert (check_pairs.length () > 0); >> + >> + tree alias_cond = NULL_TREE; >> + create_runtime_alias_checks (context_loop, &check_pairs, &alias_cond); >> + gcc_checking_assert (alias_cond); >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + { >> + fprintf (dump_file, "Generated runtime alias check: "); >> + print_generic_expr (dump_file, alias_cond, dump_flags); >> + fprintf (dump_file, "\n"); >> + } >> + >> + return alias_cond; >> +} >> + >> /* GIMPLE Loop Generator: generates loops in GIMPLE form for the given SCOP. >> Return true if code generation succeeded. */ >> >> @@ -1504,12 +1547,38 @@ graphite_regenerate_ast_isl (scop_p scop) >> region->if_region = if_region; >> >> loop_p context_loop = region->region.entry->src->loop_father; >> + gcc_checking_assert (context_loop); >> edge e = single_succ_edge (if_region->true_region->region.entry->dest); >> basic_block bb = split_edge (e); >> >> /* Update the true_region exit edge. */ >> region->if_region->true_region->region.exit = single_succ_edge (bb); >> >> + if (flag_graphite_runtime_alias_checks >> + && scop->unhandled_alias_ddrs.length () > 0) >> + { >> + /* SCoP detection has failed to handle the aliasing between some >> + data-references of the SCoP statically. Generate an alias >> + check that selects the newly generated version of the SCoP >> + (in the true-branch of the conditional) if aliasing can be >> + ruled out at runtime and the original version of the SCoP, >> + otherwise. */ >> + >> + loop_p loop >> + = find_common_loop (scop->scop_info->region.entry->dest->loop_father, >> + scop->scop_info->region.exit->src->loop_father); >> + >> + tree cond = generate_alias_cond (scop->unhandled_alias_ddrs, loop); >> + tree non_alias_cond = build1 (TRUTH_NOT_EXPR, boolean_type_node, >> cond); >> + set_ifsese_condition (region->if_region, non_alias_cond); >> + /* The loop nest is shared by all DDRs, cf. build_alias_set. */ >> + DDR_LOOP_NEST (scop->unhandled_alias_ddrs[0]).release (); >> + free_dependence_relations (scop->unhandled_alias_ddrs); >> + } >> + >> + if (dump_file) >> + fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n"); >> + >> t.translate_isl_ast (context_loop, root_node, e, ip); >> if (! t.codegen_error_p ()) >> { >> @@ -1520,8 +1589,6 @@ graphite_regenerate_ast_isl (scop_p scop) >> if_region->region->region.exit->src, >> if_region->false_region->region.exit, >> if_region->true_region->region.exit); >> - if (dump_file) >> - fprintf (dump_file, "[codegen] isl AST to Gimple succeeded.\n"); >> } >> >> if (t.codegen_error_p ()) >> diff --git a/gcc/graphite-oacc.h b/gcc/graphite-oacc.h >> new file mode 100644 >> index 00000000000..5978f428974 >> --- /dev/null >> +++ b/gcc/graphite-oacc.h >> @@ -0,0 +1,90 @@ >> +/* Graphite OpenACC helpers >> + Copyright (C) 2006-2020 Free Software Foundation, Inc. >> + Contributed by Sebastian Pop <sebastian....@inria.fr>. >> + >> +This file is part of GCC. >> + >> +GCC is free software; you can redistribute it and/or modify >> +it under the terms of the GNU General Public License as published by >> +the Free Software Foundation; either version 3, or (at your option) >> +any later version. >> + >> +GCC is distributed in the hope that it will be useful, >> +but WITHOUT ANY WARRANTY; without even the implied warranty of >> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> +GNU General Public License for more details. >> + >> +You should have received a copy of the GNU General Public License >> +along with GCC; see the file COPYING3. If not see >> +<http://www.gnu.org/licenses/>. */ >> + >> +/* This pass converts GIMPLE to GRAPHITE, performs some loop >> + transformations and then converts the resulting representation back >> + to GIMPLE. >> + >> + An early description of this pass can be found in the GCC Summit'06 >> + paper "GRAPHITE: Polyhedral Analyses and Optimizations for GCC". >> + The wiki page http://gcc.gnu.org/wiki/Graphite contains pointers to >> + the related work. */ >> + >> +#include "omp-general.h" >> +#include "attribs.h" >> + >> +static inline bool oacc_function_p (function *fun) >> +{ >> + return oacc_get_fn_attrib (fun->decl); >> +} >> + >> +/* Represents a field of the ".omp_data_i" argument of >> + an outlined OpenACC function. Each such field >> + is used to pass a unique variable from the function >> + that originally contained the loop to the outlined >> + function. */ >> + >> +struct oacc_data_field { >> + /* The variable of the source function that >> + gets passed through this field. */ >> + tree src_var; >> + >> + /* The variable that holds the dereferenced value of the >> + field. This might be left NULL for reduction variables. */ >> + // TODO-kernels Should we also set this for reduction variables? >> + // This seems to be unnecessary since we do not create data-refs >> + // for reduction variables. >> + >> + tree tgt_var; >> +}; >> + >> +class oacc_omp_data >> +{ >> +private: >> + hash_map<tree, oacc_data_field> field_map; >> + >> + void gather_assignments (struct function *fn); >> + tree get_accessed_field (tree t); >> +public: >> + tree src_fn_arg; >> + tree tgt_fn_arg; >> + gimple* src_fn_def; >> + // TODO-kernels This belongs into the oacc_context >> + loop_p loop; >> + static oacc_omp_data construct (struct function* fn); >> + tree redirect_data_ref (tree ref); >> +}; >> + >> +class oacc_context { >> +public: >> + oacc_omp_data omp_data; >> + >> +private: >> + bool valid; >> + oacc_context () : omp_data (), valid (false) {} >> + oacc_context (oacc_omp_data omp_data) : >> + omp_data (omp_data), >> + valid (true) {} >> +public: >> + static oacc_context build_context (); >> + static oacc_context invalid_context () { return oacc_context (); } >> + >> + bool is_valid () { return valid; } >> +}; >> diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c >> index 75f81227f8a..ccdf3aa4d5b 100644 >> --- a/gcc/graphite-scop-detection.c >> +++ b/gcc/graphite-scop-detection.c >> @@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-into-ssa.h" >> #include "tree-ssa.h" >> #include "cfgloop.h" >> +#include "tree-chrec-oacc.h" >> #include "tree-data-ref.h" >> #include "tree-scalar-evolution.h" >> #include "tree-pass.h" >> @@ -49,6 +50,9 @@ along with GCC; see the file COPYING3. If not see >> #include "gimple-pretty-print.h" >> #include "cfganal.h" >> #include "graphite.h" >> +#include "omp-general.h" >> +#include "graphite-oacc.h" >> +#include "print-tree.h" >> >> class debug_printer >> { >> @@ -69,12 +73,27 @@ public: >> fprintf (output.dump_file, "%d", i); >> return output; >> } >> + >> friend debug_printer & >> operator<< (debug_printer &output, const char *s) >> { >> fprintf (output.dump_file, "%s", s); >> return output; >> } >> + >> + friend debug_printer & >> + operator<< (debug_printer &output, gimple* stmt) >> + { >> + print_gimple_stmt (output.dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS); >> + return output; >> + } >> + >> + friend debug_printer & >> + operator<< (debug_printer &output, tree t) >> + { >> + print_generic_expr (output.dump_file, t, TDF_SLIM); >> + return output; >> + } >> } dp; >> >> #define DEBUG_PRINT(args) do \ >> @@ -286,7 +305,8 @@ namespace >> class scop_detection >> { >> public: >> - scop_detection () : scops (vNULL) {} >> + scop_detection (oacc_context *oacc_ctx) >> + : scops (vNULL), oacc_ctx (oacc_ctx) {} >> >> ~scop_detection () >> { >> @@ -354,24 +374,6 @@ public: >> bool stmt_simple_for_scop_p (sese_l scop, gimple *stmt, >> basic_block bb) const; >> >> - /* Something like "n * m" is not allowed. */ >> - >> - static bool graphite_can_represent_init (tree e); >> - >> - /* Return true when SCEV can be represented in the polyhedral model. >> - >> - An expression can be represented, if it can be expressed as an >> - affine expression. For loops (i, j) and parameters (m, n) all >> - affine expressions are of the form: >> - >> - x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z >> - >> - 1 i + 20 j + (-2) m + 25 >> - >> - Something like "i * n" or "n * m" is not allowed. */ >> - >> - static bool graphite_can_represent_scev (sese_l scop, tree scev); >> - >> /* Return true when EXPR can be represented in the polyhedral model. >> >> This means an expression can be represented, if it is linear with >> respect >> @@ -382,9 +384,9 @@ public: >> tree expr); >> >> /* Return true if the data references of STMT can be represented by >> Graphite. >> - We try to analyze the data references in a loop contained in the SCOP. >> */ >> + We try to analyze the data references in a loop contained in the SCOP. >> */ >> >> - static bool stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt); >> + static bool stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt, >> oacc_context *oacc_ctx); >> >> /* Remove the close phi node at GSI and replace its rhs with the rhs >> of PHI. */ >> @@ -403,6 +405,7 @@ public: >> >> private: >> vec<sese_l> scops; >> + oacc_context *oacc_ctx; >> }; >> >> sese_l scop_detection::invalid_sese (NULL, NULL); >> @@ -560,14 +563,58 @@ scop_detection::can_represent_loop (loop_p loop, >> sese_l scop) >> || !single_pred_p (loop->latch) >> || exit->src != single_pred (loop->latch) >> || !empty_block_p (loop->latch)) >> - return false; >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop shape invalid.\n"); >> + return false; >> + >> + } >> + >> + bool edge_irreducible = loop_preheader_edge (loop)->flags & >> EDGE_IRREDUCIBLE_LOOP; >> + if (edge_irreducible) >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop is not a natural >> loop.\n"); >> + return false; >> + } >> + >> + bool niter_is_unconditional = >> + number_of_iterations_exit (loop, single_exit (loop), &niter_desc, >> false); >> + if (!niter_is_unconditional) >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter not >> unconditional.\n"); >> + return false; >> + } >> + >> + if (!niter_desc.control.no_overflow) >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter can >> overflow.\n"); >> + return false; >> + } >> + >> + niter = number_of_latch_executions (loop); >> + if (!niter) >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] Loop niter unknown.\n"); >> + return false; >> + } >> + >> + bool undetermined_coefficients = chrec_contains_undetermined (niter); >> + if (undetermined_coefficients) >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] " >> + << "Loop niter chrec contains undetermined >> coefficients.\n"); >> + return false; >> + } >> >> - return !(loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP) >> - && number_of_iterations_exit (loop, single_exit (loop), &niter_desc, >> false) >> - && niter_desc.control.no_overflow >> - && (niter = number_of_latch_executions (loop)) >> - && !chrec_contains_undetermined (niter) >> - && graphite_can_represent_expr (scop, loop, niter); >> + bool can_represent_expr = graphite_can_represent_expr (scop, loop, niter); >> + if (!can_represent_expr) >> + { >> + DEBUG_PRINT (dp << "[can_represent_loop-fail] " >> + << "Loop niter expression cannot be represented: " >> + << niter << "\n"); >> + return false; >> + } >> + >> + return true; >> } >> >> /* Return true when BEGIN is the preheader edge of a loop with a single exit >> @@ -615,13 +662,12 @@ scop_detection::add_scop (sese_l s) >> s.exit = single_succ_edge (s.exit->dest); >> } >> >> - /* Do not add scops with only one loop. */ >> - if (region_has_one_loop (s)) >> - { >> - DEBUG_PRINT (dp << "[scop-detection-fail] Discarding one loop SCoP: "; >> - print_sese (dump_file, s)); >> - return; >> - } >> + if (!oacc_function_p (cfun) && region_has_one_loop (s)) >> + { >> + DEBUG_PRINT (dp << "[scop-detection-fail] Discarding one loop SCoP: >> "; >> + print_sese (dump_file, s)); >> + return; >> + } >> >> if (get_exit_bb (s) == EXIT_BLOCK_PTR_FOR_FN (cfun)) >> { >> @@ -805,140 +851,87 @@ scop_detection::remove_intersecting_scops (sese_l s1) >> } >> } >> >> -/* Something like "n * m" is not allowed. */ >> +/* Return true when EXPR can be represented in the polyhedral model. >> + >> + This means an expression can be represented, if it is linear with >> respect to >> + the loops and the strides are non parametric. LOOP is the place where >> the >> + expr will be evaluated. SCOP defines the region we analyse. */ >> >> bool >> -scop_detection::graphite_can_represent_init (tree e) >> +scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop, >> + tree expr) >> { >> - switch (TREE_CODE (e)) >> + if (TREE_CODE (expr) == SSA_NAME) >> { >> - case POLYNOMIAL_CHREC: >> - return graphite_can_represent_init (CHREC_LEFT (e)) >> - && graphite_can_represent_init (CHREC_RIGHT (e)); >> - >> - case MULT_EXPR: >> - if (chrec_contains_symbols (TREE_OPERAND (e, 0))) >> - return graphite_can_represent_init (TREE_OPERAND (e, 0)) >> - && tree_fits_shwi_p (TREE_OPERAND (e, 1)); >> - else >> - return graphite_can_represent_init (TREE_OPERAND (e, 1)) >> - && tree_fits_shwi_p (TREE_OPERAND (e, 0)); >> + tree name = SSA_NAME_IDENTIFIER (expr); >> >> - case PLUS_EXPR: >> - case POINTER_PLUS_EXPR: >> - case MINUS_EXPR: >> - return graphite_can_represent_init (TREE_OPERAND (e, 0)) >> - && graphite_can_represent_init (TREE_OPERAND (e, 1)); >> - >> - case NEGATE_EXPR: >> - case BIT_NOT_EXPR: >> - CASE_CONVERT: >> - case NON_LVALUE_EXPR: >> - return graphite_can_represent_init (TREE_OPERAND (e, 0)); >> - >> - default: >> - break; >> + if (name) >> + { >> + const char* id = IDENTIFIER_POINTER (name); >> + if (strncmp (id, ".bound", 6) == 0 >> + || strncmp (id, ".offset", 7) == 0 >> + || strncmp (id, ".chunk_max", 11) == 0 >> + || strncmp (id, ".chunk_no", 10) == 0 >> + || strncmp (id, ".step", 5) == 0) >> + return true; >> + } >> } >> + tree scev = cached_scalar_evolution_in_region (scop, loop, expr); >> + bool can_represent = graphite_can_represent_scev (scop, scev); >> >> - return true; >> -} >> - >> -/* Return true when SCEV can be represented in the polyhedral model. >> - >> - An expression can be represented, if it can be expressed as an >> - affine expression. For loops (i, j) and parameters (m, n) all >> - affine expressions are of the form: >> - >> - x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z >> - >> - 1 i + 20 j + (-2) m + 25 >> - >> - Something like "i * n" or "n * m" is not allowed. */ >> - >> -bool >> -scop_detection::graphite_can_represent_scev (sese_l scop, tree scev) >> -{ >> - if (chrec_contains_undetermined (scev)) >> - return false; >> - >> - switch (TREE_CODE (scev)) >> + if (!can_represent) >> { >> - case NEGATE_EXPR: >> - case BIT_NOT_EXPR: >> - CASE_CONVERT: >> - case NON_LVALUE_EXPR: >> - return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)); >> - >> - case PLUS_EXPR: >> - case POINTER_PLUS_EXPR: >> - case MINUS_EXPR: >> - return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) >> - && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); >> - >> - case MULT_EXPR: >> - return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0))) >> - && !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 1))) >> - && !(chrec_contains_symbols (TREE_OPERAND (scev, 0)) >> - && chrec_contains_symbols (TREE_OPERAND (scev, 1))) >> - && graphite_can_represent_init (scev) >> - && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) >> - && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); >> - >> - case POLYNOMIAL_CHREC: >> - /* Check for constant strides. With a non constant stride of >> - 'n' we would have a value of 'iv * n'. Also check that the >> - initial value can represented: for example 'n * m' cannot be >> - represented. */ >> - gcc_assert (loop_in_sese_p (get_loop (cfun, >> - CHREC_VARIABLE (scev)), scop)); >> - if (!evolution_function_right_is_integer_cst (scev) >> - || !graphite_can_represent_init (scev)) >> - return false; >> - return graphite_can_represent_scev (scop, CHREC_LEFT (scev)); >> - >> - case ADDR_EXPR: >> - /* We cannot encode addresses for ISL. */ >> - return false; >> - >> - default: >> - break; >> + if (dump_file) >> + { >> + fprintf (dump_file, "[graphite_can_represent_expr] Cannot >> represent scev "); >> + print_generic_expr (dump_file, scev, TDF_SLIM); >> + fprintf (dump_file, " of expression "); >> + print_generic_expr (dump_file, expr, TDF_SLIM); >> + fprintf (dump_file, "\n"); >> + } >> } >> - >> - /* Only affine functions can be represented. */ >> - if (tree_contains_chrecs (scev, NULL) || !scev_is_linear_expression >> (scev)) >> - return false; >> - >> - return true; >> + return can_represent; >> } >> >> -/* Return true when EXPR can be represented in the polyhedral model. >> - >> - This means an expression can be represented, if it is linear with >> respect to >> - the loops and the strides are non parametric. LOOP is the place where >> the >> - expr will be evaluated. SCOP defines the region we analyse. */ >> +/* Check if STMT is a internal OpenACC function call that should be >> + ignored when Graphite checks side effects and data references. */ >> >> -bool >> -scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop, >> - tree expr) >> -{ >> - tree scev = cached_scalar_evolution_in_region (scop, loop, expr); >> - return graphite_can_represent_scev (scop, scev); >> +static inline bool >> +oacc_ignore_internal_stmt (gimple *stmt) { >> + return is_gimple_call (stmt) && >> + (gimple_call_internal_p (stmt, IFN_UNIQUE) >> + || gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)); >> } >> >> /* Return true if the data references of STMT can be represented by >> Graphite. >> We try to analyze the data references in a loop contained in the SCOP. >> */ >> >> bool >> -scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt) >> +scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt, >> oacc_context *oacc_ctx) >> { >> + if (oacc_ignore_internal_stmt (stmt)) >> + return true; >> + >> edge nest = scop.entry; >> loop_p loop = loop_containing_stmt (stmt); >> if (!loop_in_sese_p (loop, scop)) >> loop = NULL; >> >> + bool allow_non_affine_base = flag_graphite_non_affine_accesses; >> auto_vec<data_reference_p> drs; >> - if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs)) >> - return false; >> + if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs, >> + oacc_ctx, >> allow_non_affine_base)) >> + { >> + DEBUG_PRINT (dp << "[stmt_has_simple_data_refs_p] Unanalyzable >> statement.\n"); >> + return false; >> + } >> + >> + /* This flag means that we allow Graphite to overapproximate the >> + range of data references. Consequently, we do not need to check >> + if Graphite can actually represent the access functions' >> + SCEVs. */ >> + if (flag_graphite_non_affine_accesses) >> + return true; >> >> int j; >> data_reference_p dr; >> @@ -946,7 +939,10 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l >> scop, gimple *stmt) >> { >> for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i) >> if (! graphite_can_represent_scev (scop, DR_ACCESS_FN (dr, i))) >> - return false; >> + { >> + DEBUG_PRINT (dp << "[stmt_has_simple_data_refs_p] Cannot >> represent access function SCEV: " << DR_ACCESS_FN (dr, i) << "\n"); >> + return false; >> + } >> } >> >> return true; >> @@ -959,6 +955,9 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l >> scop, gimple *stmt) >> static bool >> stmt_has_side_effects (gimple *stmt) >> { >> + if (oacc_ignore_internal_stmt (stmt)) >> + return false; >> + >> if (gimple_has_volatile_ops (stmt) >> || (gimple_code (stmt) == GIMPLE_CALL >> && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE))) >> @@ -990,11 +989,16 @@ scop_detection::stmt_simple_for_scop_p (sese_l scop, >> gimple *stmt, >> if (stmt_has_side_effects (stmt)) >> return false; >> >> - if (!stmt_has_simple_data_refs_p (scop, stmt)) >> + if (!stmt_has_simple_data_refs_p (scop, stmt, oacc_ctx)) >> { >> - DEBUG_PRINT (dp << "[scop-detection-fail] " >> - << "Graphite cannot handle data-refs in stmt:\n"; >> - print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);); >> + DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, >> + "[scop-detection-fail] Graphite cannot " >> + "handle data-refs-in-stmt: "); >> + print_gimple_stmt (dump_file, stmt, 0, >> + TDF_VOPS | TDF_MEMSYMS); >> + fprintf (dump_file, "\n"); >> + >> + ); >> return false; >> } >> >> @@ -1027,14 +1031,22 @@ scop_detection::stmt_simple_for_scop_p (sese_l scop, >> gimple *stmt, >> for (unsigned i = 0; i < 2; ++i) >> { >> tree op = gimple_op (stmt, i); >> - if (!graphite_can_represent_expr (scop, loop, op) >> - /* We can only constrain on integer type. */ >> - || ! INTEGRAL_TYPE_P (TREE_TYPE (op))) >> + if (!graphite_can_represent_expr (scop, loop, op)) >> + { >> + DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, >> + "[scop-detection-fail] " >> + "Graphite cannot represent >> cond " >> + "stmt operator >> expression.\n")); >> + return false; >> + } >> + >> + if (! INTEGRAL_TYPE_P (TREE_TYPE (op))) >> { >> - DEBUG_PRINT (dp << "[scop-detection-fail] " >> - << "Graphite cannot represent stmt:\n"; >> - print_gimple_stmt (dump_file, stmt, 0, >> - TDF_VOPS | TDF_MEMSYMS)); >> + DEBUG_PRINT (dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmt, >> + "[scop-detection-fail] " >> + "Graphite cannot represent >> cond " >> + "statement operator. " >> + "Type must be integral.\n")); >> return false; >> } >> } >> @@ -1151,6 +1163,7 @@ scan_tree_for_params (sese_info_p s, tree e) >> break; >> >> case SSA_NAME: >> + //TODO-kernels Stop treating some OpenACC ifn calls as parameters >> (reductions?) >> assign_parameter_index_in_region (e, s); >> break; >> >> @@ -1288,11 +1301,99 @@ build_cross_bb_scalars_use (scop_p scop, tree use, >> gimple *use_stmt, >> add_read (reads, use, use_stmt); >> } >> >> +/* This class keeps track of the variables that occur in active >> + OpenACC reductions while walking a function's basic blocks during >> + SCoP detection. The UPDATE method processes calls to the OpenACC >> + internal functions which mark the beginning and the end of the use >> + of a reduction variable. It adjusts an internal reference count >> + that is maintained for each such variable accordingly (i.e. number >> + of reductions using a SSA_NAME with the variable name). */ >> + >> +class oacc_reductions { >> + public: >> + bool is_reduction_var (const tree var); >> + void update (const gimple* oacc_reduction_call); >> + void update (const basic_block); >> + private: >> + hash_map<tree, unsigned> reductions; >> +}; >> + >> +/* Check if the DEF is a SSA_NAME for a variable that occurs in an >> + active reduction. */ >> + >> +bool oacc_reductions::is_reduction_var (const tree def) { >> + if (TREE_CODE (def) != SSA_NAME) >> + return false; >> + >> + tree var = SSA_NAME_VAR (def); >> + if (var == NULL_TREE) >> + return false; >> + >> + return reductions.get (var); >> +} >> + >> +/* Update the internal reference count for the variable used by the >> + OACC_REDUCTION_CALL if it starts or ends a reduction. */ >> + >> +void oacc_reductions::update (const gimple* oacc_reduction_call) >> +{ >> + const gcall* call = GIMPLE_CHECK2<const gcall *> (oacc_reduction_call); >> + unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0)); >> + >> + if (!gimple_has_lhs (call)) >> + return; >> + >> + tree var = SSA_NAME_VAR (gimple_call_lhs (call)); >> + >> + if (var == NULL) >> + return; >> + >> + switch (code) >> + { >> + case IFN_GOACC_REDUCTION_SETUP: >> + { >> + unsigned& ref_count = reductions.get_or_insert (var); >> + ref_count++; >> + >> + break; >> + } >> + case IFN_GOACC_REDUCTION_FINI: >> + { >> + unsigned* ref_count = reductions.get (var); >> + gcc_checking_assert (ref_count != NULL && *ref_count > 0); >> + ref_count--; >> + >> + if (ref_count == 0) >> + reductions.remove (var); >> + >> + break; >> + } >> + >> + default: >> + break; >> + } >> +} >> + >> +void oacc_reductions::update (const basic_block bb) >> +{ >> + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); >> + gsi_next (&gsi)) >> + { >> + gimple *stmt = gsi_stmt (gsi); >> + if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION)) >> + update (stmt); >> + } >> +} >> + >> + >> + >> /* Generates a polyhedral black box only if the bb contains interesting >> information. */ >> >> static gimple_poly_bb_p >> -try_generate_gimple_bb (scop_p scop, basic_block bb) >> +try_generate_gimple_bb (scop_p scop, basic_block bb, >> + oacc_reductions& oacc_reductions, >> + __attribute__ ((unused)) oacc_context* oacc_ctx) >> { >> vec<data_reference_p> drs = vNULL; >> vec<tree> writes = vNULL; >> @@ -1304,6 +1405,7 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) >> if (!loop_in_sese_p (loop, region)) >> loop = NULL; >> >> + bool allow_non_affine_base = flag_graphite_non_affine_accesses; >> for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); >> gsi_next (&gsi)) >> { >> @@ -1311,16 +1413,27 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) >> if (is_gimple_debug (stmt)) >> continue; >> >> - graphite_find_data_references_in_stmt (nest, loop, stmt, &drs); >> + graphite_find_data_references_in_stmt (nest, loop, stmt, &drs, >> + oacc_ctx, >> allow_non_affine_base); >> >> tree def = gimple_get_lhs (stmt); >> - if (def) >> + if (def >> + /* When analyzing the outlined function for an OpenACC >> + region, no dependencies on reduction variables should be >> + generated. Those variables must be ignored when deciding >> + if a loop can be parallel. */ >> + && !oacc_reductions.is_reduction_var (def)) >> build_cross_bb_scalars_def (scop, def, gimple_bb (stmt), &writes); >> >> ssa_op_iter iter; >> tree use; >> FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) >> - build_cross_bb_scalars_use (scop, use, stmt, &reads); >> + { >> + if (oacc_reductions.is_reduction_var (use)) >> + continue; >> + >> + build_cross_bb_scalars_use (scop, use, stmt, &reads); >> + } >> } >> >> /* Handle defs and uses in PHIs. Those need special treatment given >> @@ -1332,7 +1445,8 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) >> gphi *phi = psi.phi (); >> tree res = gimple_phi_result (phi); >> if (virtual_operand_p (res) >> - || scev_analyzable_p (res, scop->scop_info->region)) >> + || scev_analyzable_p (res, scop->scop_info->region) >> + || oacc_reductions.is_reduction_var (res)) >> continue; >> /* To simulate out-of-SSA the block containing the PHI node has >> reads of the PHI destination. And to preserve SSA dependences >> @@ -1362,13 +1476,15 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) >> continue; >> /* To simulate out-of-SSA the predecessor of edges into PHI >> nodes >> has a copy from the PHI argument to the PHI destination. */ >> - if (! scev_analyzable_p (res, scop->scop_info->region)) >> + if (! scev_analyzable_p (res, scop->scop_info->region) >> + && ! oacc_reductions.is_reduction_var (res)) >> add_write (&writes, res); >> tree use = PHI_ARG_DEF_FROM_EDGE (phi, e); >> if (TREE_CODE (use) == SSA_NAME >> && ! SSA_NAME_IS_DEFAULT_DEF (use) >> && gimple_bb (SSA_NAME_DEF_STMT (use)) != bb_for_succs >> - && ! scev_analyzable_p (use, scop->scop_info->region)) >> + && ! scev_analyzable_p (use, scop->scop_info->region) >> + && ! oacc_reductions.is_reduction_var (use)) >> add_read (&reads, use, phi); >> } >> if (e->dest == bb_for_succs->loop_father->latch >> @@ -1402,40 +1518,316 @@ try_generate_gimple_bb (scop_p scop, basic_block bb) >> return new_gimple_poly_bb (bb, drs, reads, writes); >> } >> >> +bool oacc_is_omp_data_use (oacc_context *ctx, data_reference_p dr) >> +{ >> + tree t; >> + >> + switch (TREE_CODE (dr->ref)) >> + { >> + case COMPONENT_REF: >> + { >> + tree referenced = TREE_OPERAND (dr->ref, 0); >> + tree target = TREE_OPERAND (referenced, 0); >> + >> + if (TREE_CODE(target) != SSA_NAME || >> + SSA_NAME_VAR(target) == NULL_TREE) >> + return false; >> + >> + if (SSA_NAME_VAR (target) == ctx->omp_data.tgt_fn_arg) >> + return true; >> + } >> + >> + default: >> + return false; >> + } >> + >> + return false; >> +} >> + >> +/* Aliasing involving the pointers contained in the ".omp_data_i" >> + struct can be safely ignored. We are analysing the behavior of a >> + loop nest with respect to the original function and those pointers >> + are artifacts of the outlining process. */ >> + >> +bool oacc_ignore_alias (oacc_context *ctx, data_reference_p dr1, >> data_reference_p dr2) >> +{ >> + if (oacc_is_omp_data_use (ctx, dr1) >> + || oacc_is_omp_data_use (ctx, dr2)) >> + return true; >> + >> + return false; >> +} >> + >> +/* Returns true if expression EXPR is defined between ENTRY and >> + EXIT. */ >> + >> +static bool >> +def_in_region_p (edge entry, edge exit, tree expr) >> +{ >> + basic_block entry_bb = entry->dest; >> + basic_block exit_bb = exit->dest; >> + basic_block def_bb; >> + >> + if (! expr) >> + return false; >> + >> + if (is_gimple_min_invariant (expr)) >> + return false; >> + >> + if (TREE_CODE (expr) == SSA_NAME) >> + { >> + def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr)); >> + if (!def_bb) >> + return false; >> + >> + if (dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb) >> + && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb)) >> + return true; >> + >> + return false; >> + } >> + else { >> + for (unsigned i = 0; i < TREE_OPERAND_LENGTH (expr); i++) >> + if (def_in_region_p (entry, exit, TREE_OPERAND (expr, i))) >> + return true; >> + } >> + >> + return false; >> +} >> + >> +static bool >> +scev_defs_outside_region_p (edge entry, edge exit, tree scev) >> +{ >> + if (chrec_contains_undetermined (scev)) >> + return false; >> + >> + switch (TREE_CODE (scev)) >> + { >> + case NEGATE_EXPR: >> + case BIT_NOT_EXPR: >> + case NON_LVALUE_EXPR: >> + case ADDR_EXPR: >> + return scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, >> 0)); >> + >> + case PLUS_EXPR: >> + case POINTER_PLUS_EXPR: >> + case MINUS_EXPR: >> + case MULT_EXPR: >> + case POLYNOMIAL_CHREC: >> + return scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, >> 0)) >> + && scev_defs_outside_region_p (entry, exit, TREE_OPERAND (scev, 1)); >> + >> + default: >> + break; >> + } >> + >> + return ! def_in_region_p (entry, exit, scev); >> +} >> + >> +/* Checks if all parts of the DRI are defined outside of the region >> + surrounded by the given edges which allows an alias check involving >> + DRI to be placed before the region. */ >> + >> +static bool >> +dr_defs_outside_region (edge entry, edge exit, dr_info *dri) >> +{ >> + data_reference_p dr = dri->dr; >> + tree base = DR_BASE_OBJECT (dr); >> + if (def_in_region_p (entry, exit, base)) >> + { >> + DEBUG_PRINT(dp << "base defined inside SCoP.\n"); >> + return false; >> + } >> + >> + tree step = DR_STEP (dr); >> + if (def_in_region_p (entry, exit, step)) >> + { >> + DEBUG_PRINT(dp << "step defined inside SCoP.\n"); >> + return false; >> + } >> + >> + tree base_addr = DR_BASE_ADDRESS (dr); >> + if (def_in_region_p (entry, exit, base_addr)) >> + { >> + DEBUG_PRINT(dp << "base address defined inside SCoP.\n"); >> + return false; >> + } >> + >> + for (unsigned i = 0; i < DR_NUM_DIMENSIONS(dr); ++i) >> + { >> + tree access = DR_ACCESS_FN (dr, i); >> + if (! scev_defs_outside_region_p (entry, exit, access)) >> + { >> + DEBUG_PRINT(fprintf (dump_file, "%d-th access function uses >> definitions from SCoP.\n", i); >> + print_generic_expr (dump_file, access, dump_flags); >> + fprintf (dump_file, "\n");); >> + return false; >> + } >> + } >> + >> + return true; >> +} >> + >> + >> + >> +/* Return TRUE if a runtime alias check to resolve the aliasing >> + between the DDRs DR1 and DR2 which belong to the LOOP in the region >> + delineated by SCOP_ENTRY and SCOP_EXIT may be created and placed >> + before that region. */ >> + >> +static opt_result >> +graphite_runtime_alias_check_p (dr_info* dr1, dr_info* dr2, class loop >> *loop, >> + edge scop_entry, edge scop_exit) >> +{ >> + gcc_checking_assert (loop); >> + gcc_checking_assert (dr1); >> + gcc_checking_assert (dr2); >> + >> + if (dump_enabled_p ()) >> + dump_printf (MSG_NOTE, >> + "consider run-time aliasing test between %T and %T\n", >> + DR_REF (dr1->dr), DR_REF (dr2->dr)); >> + >> + >> + if (! optimize_loop_for_speed_p (loop)) >> + return opt_result::failure_at (DR_STMT (dr1->dr), >> + "runtime alias check not supported when" >> + " optimizing for size.\n"); >> + >> + >> + /* Verify that we have enough information about the data-references >> + and context loop to construct a runtime alias check with >> + "compute_alias_check_pairs". */ >> + >> + if (loop->num != 0) { >> + tree niters = number_of_latch_executions (loop); >> + if (niters == NULL_TREE || niters == chrec_dont_know) >> + return opt_result::failure_at (DR_STMT (dr1->dr), >> + "could not determine number of >> iterations " >> + "of the SCoP's context loop. " >> + "Aborting runtime alias checks.\n"); >> + } >> + >> + /* The runtime alias check selects between the optimized and the >> + original version of a SCoP. Hence, it must be placed before the >> + SCoP which is not possible if some of the data reference's fields >> + refer to definitions inside of the SCoP. */ >> + >> + if (! dr_defs_outside_region (scop_entry, scop_exit, dr1) >> + || ! dr_defs_outside_region (scop_entry, scop_exit, dr2)) >> + return opt_result::failure_at (DR_STMT (dr1->dr), >> + "data-references use definitions inside >> of " >> + "SCoP. " >> + "Aborting runtime alias checks.\n"); >> + >> + >> + return opt_result::success (); >> +} >> + >> /* Compute alias-sets for all data references in DRS. */ >> >> -static bool >> -build_alias_set (scop_p scop) >> +static bool >> +build_alias_set (scop_p scop, oacc_context *oacc_ctx) >> { >> int num_vertices = scop->drs.length (); >> struct graph *g = new_graph (num_vertices); >> dr_info *dr1, *dr2; >> int i, j; >> int *all_vertices; >> + edge scop_entry = scop->scop_info->region.entry; >> + edge scop_exit = scop->scop_info->region.exit; >> >> struct loop *nest >> - = find_common_loop (scop->scop_info->region.entry->dest->loop_father, >> - scop->scop_info->region.exit->src->loop_father); >> + = find_common_loop (scop_entry->dest->loop_father, >> + scop_exit->src->loop_father); >> + >> + gcc_checking_assert (nest); >> + >> + DEBUG_PRINT(dp << "[build_alias_set]: Data references:\n"; >> + dr_info *dr; >> + FOR_EACH_VEC_ELT (scop->drs, i, dr) >> + { >> + dump_data_reference (dump_file, dr->dr); >> + } >> + ); >> + >> + auto_vec<loop_p, 1> nest_vec; >> + if (flag_graphite_runtime_alias_checks) >> + { >> + scop->unhandled_alias_ddrs.create (1); >> + nest_vec.safe_push (nest); >> + } >> >> FOR_EACH_VEC_ELT (scop->drs, i, dr1) >> for (j = i+1; scop->drs.iterate (j, &dr2); j++) >> - if (dr_may_alias_p (dr1->dr, dr2->dr, nest)) >> + if (! (DR_IS_READ (dr1->dr) && DR_IS_READ (dr2->dr)) >> + && dr_may_alias_p (dr1->dr, dr2->dr, nest)) >> { >> - /* Dependences in the same alias set need to be handled >> - by just looking at DR_ACCESS_FNs. */ >> - if (DR_NUM_DIMENSIONS (dr1->dr) == 0 >> - || DR_NUM_DIMENSIONS (dr1->dr) != DR_NUM_DIMENSIONS (dr2->dr) >> - || ! operand_equal_p (DR_BASE_OBJECT (dr1->dr), >> - DR_BASE_OBJECT (dr2->dr), >> - OEP_ADDRESS_OF) >> - || ! types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (dr1->dr)), >> - TREE_TYPE (DR_BASE_OBJECT (dr2->dr)))) >> + if (! oacc_ignore_alias (oacc_ctx, dr1->dr, dr2->dr)) >> { >> - free_graph (g); >> - return false; >> + /* Dependences in the same alias set need to be handled >> + by just looking at DR_ACCESS_FNs. */ >> + bool dimension_zero = DR_NUM_DIMENSIONS (dr1->dr) == 0; >> + if (dimension_zero) >> + DEBUG_PRINT(dp << "[build_alias_set] DR1 has dimension >> 0\n"); >> + >> + bool different_dimensions = >> + DR_NUM_DIMENSIONS (dr1->dr) != DR_NUM_DIMENSIONS (dr2->dr); >> + if (different_dimensions) >> + DEBUG_PRINT (dp << "[build_alias_set] " >> + "DRs have different dimensions\n"); >> + >> + bool different_base_objects = >> + ! operand_equal_p (DR_BASE_OBJECT (dr1->dr), >> + DR_BASE_OBJECT (dr2->dr), OEP_ADDRESS_OF); >> + if (different_base_objects) >> + DEBUG_PRINT (dp << "[build_alias_set] " >> + "DRs access different objects\n"); >> + >> + bool incompatible_types = >> + ! types_compatible_p (TREE_TYPE (DR_BASE_OBJECT (dr1->dr)), >> + TREE_TYPE (DR_BASE_OBJECT (dr2->dr))); >> + if (incompatible_types) >> + DEBUG_PRINT (dp << "[build_alias_set] " >> + "DRs with incompatible base object types"); >> + >> + if (dimension_zero || different_dimensions >> + || different_base_objects || incompatible_types) >> + { >> + if (! flag_graphite_runtime_alias_checks) >> + goto FAIL; >> + >> + if (! graphite_runtime_alias_check_p (dr1, dr2, nest, >> + scop_entry, >> scop_exit)) >> + goto FAIL; >> + >> + ddr_p ddr = initialize_data_dependence_relation >> + (dr1->dr, dr2->dr, nest_vec); >> + scop->unhandled_alias_ddrs.safe_push(ddr); >> + } >> } >> add_edge (g, i, j); >> add_edge (g, j, i); >> + continue; >> + >> + FAIL: >> + DEBUG_PRINT (dp << >> + "[build_alias_set] " >> + "Cannot handle dependency between data references: >> \n"; >> + print_gimple_stmt (dump_file, dr1->dr->stmt, 2, >> TDF_DETAILS); >> + print_gimple_stmt (dump_file, dr2->dr->stmt, 2, >> TDF_DETAILS); >> + dp << "\n"); >> + >> + if (flag_graphite_runtime_alias_checks) >> + { >> + if (scop->unhandled_alias_ddrs.length () > 0) >> + /* The loop-nest vec is shared by all DDRs. */ >> + DDR_LOOP_NEST (scop->unhandled_alias_ddrs[0]).release (); >> + free_dependence_relations (scop->unhandled_alias_ddrs); >> + } >> + >> + free_graph (g); >> + return false; >> } >> >> all_vertices = XNEWVEC (int, num_vertices); >> @@ -1457,7 +1849,7 @@ build_alias_set (scop_p scop) >> class gather_bbs : public dom_walker >> { >> public: >> - gather_bbs (cdi_direction, scop_p, int *); >> + gather_bbs (cdi_direction, scop_p, int *, oacc_reductions&, >> oacc_context*); >> >> virtual edge before_dom_children (basic_block); >> virtual void after_dom_children (basic_block); >> @@ -1465,12 +1857,18 @@ public: >> private: >> auto_vec<gimple *, 3> conditions, cases; >> scop_p scop; >> + oacc_reductions &reductions; >> + oacc_context *oacc_ctx; >> }; >> -} >> -gather_bbs::gather_bbs (cdi_direction direction, scop_p scop, int >> *bb_to_rpo) >> - : dom_walker (direction, ALL_BLOCKS, bb_to_rpo), scop (scop) >> -{ >> -} >> + >> +gather_bbs::gather_bbs >> +(cdi_direction direction, scop_p scop, int *bb_to_rpo, >> + oacc_reductions& reductions, oacc_context *oacc_ctx) >> +: dom_walker (direction, ALL_BLOCKS, bb_to_rpo) >> + , scop (scop) >> + , reductions (reductions) >> + , oacc_ctx (oacc_ctx) >> +{} >> >> /* Call-back for dom_walk executed before visiting the dominated >> blocks. */ >> @@ -1478,6 +1876,8 @@ gather_bbs::gather_bbs (cdi_direction direction, >> scop_p scop, int *bb_to_rpo) >> edge >> gather_bbs::before_dom_children (basic_block bb) >> { >> + reductions.update (bb); >> + >> sese_info_p region = scop->scop_info; >> if (!bb_in_sese_p (bb, region->region)) >> return dom_walker::STOP; >> @@ -1514,7 +1914,8 @@ gather_bbs::before_dom_children (basic_block bb) >> >> scop->scop_info->bbs.safe_push (bb); >> >> - gimple_poly_bb_p gbb = try_generate_gimple_bb (scop, bb); >> + gimple_poly_bb_p gbb = try_generate_gimple_bb (scop, bb, reductions, >> oacc_ctx); >> + >> if (!gbb) >> return NULL; >> >> @@ -1563,6 +1964,44 @@ gather_bbs::after_dom_children (basic_block bb) >> } >> } >> >> +/* Update the OpenACC reductions information for all basic blocks >> + encountered by the dom_walker. This is used to adjust the >> + reduction information for the basic blocks between the SCoPs (which >> + are processed by GATHER_BBS) in the BUILD_SCOPS function. */ >> + >> +class oacc_reduction_walker : public dom_walker >> +{ >> +public: >> + oacc_reduction_walker (oacc_reductions& reductions, edge end, int *); >> + >> + virtual edge before_dom_children (basic_block); >> + >> +private: >> + auto_vec<gimple *, 3> conditions, cases; >> + oacc_reductions& reductions; >> + edge end; >> +}; >> + >> +oacc_reduction_walker::oacc_reduction_walker >> +(oacc_reductions& reductions, edge end, int *bb_to_rpo) >> +: dom_walker (CDI_DOMINATORS, ALL_BLOCKS, bb_to_rpo), >> + reductions (reductions), >> + end (end) >> +{ >> +} >> + >> +edge >> +oacc_reduction_walker::before_dom_children (basic_block bb) >> +{ >> + reductions.update (bb); >> + >> + if (bb == end->src) >> + return dom_walker::STOP; >> + else >> + return NULL; >> +} >> + >> +} >> >> /* Compute sth like an execution order, dominator order with first executing >> edges that stay inside the current loop, delaying processing exit edges. >> */ >> @@ -1590,12 +2029,12 @@ cmp_pbbs (const void *pa, const void *pb) >> them to SCOPS. */ >> >> void >> -build_scops (vec<scop_p> *scops) >> +build_scops (vec<scop_p> *scops, oacc_context *oacc_ctx) >> { >> if (dump_file) >> dp.set_dump_file (dump_file); >> >> - scop_detection sb; >> + scop_detection sb (oacc_ctx); >> sb.build_scop_depth (current_loops->tree_root); >> >> /* Now create scops from the lightweight SESEs. */ >> @@ -1611,17 +2050,26 @@ build_scops (vec<scop_p> *scops) >> >> int i; >> sese_l *s; >> + basic_block reduction_walk_start = ENTRY_BLOCK_PTR_FOR_FN (cfun); >> + oacc_reductions reductions; >> + >> FOR_EACH_VEC_ELT (scops_l, i, s) >> { >> scop_p scop = new_scop (s->entry, s->exit); >> >> + edge reduction_walk_end = s->entry; >> + oacc_reduction_walker (reductions, reduction_walk_end, bb_to_rpo) >> + .walk (reduction_walk_start); >> + reduction_walk_start = s->exit->dest; >> + >> /* Record all basic blocks and their conditions in REGION. */ >> - gather_bbs (CDI_DOMINATORS, scop, bb_to_rpo).walk (s->entry->dest); >> + gather_bbs (CDI_DOMINATORS, scop, bb_to_rpo, reductions, oacc_ctx) >> + .walk (s->entry->dest); >> >> /* Sort pbbs after execution order for initial schedule generation. >> */ >> scop->pbbs.qsort (cmp_pbbs); >> >> - if (! build_alias_set (scop)) >> + if (! build_alias_set (scop, oacc_ctx)) >> { >> DEBUG_PRINT (dp << "[scop-detection-fail] cannot handle >> dependences\n"); >> free_scop (scop); >> diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c >> index c42415e0554..c6f07ea9a99 100644 >> --- a/gcc/graphite-sese-to-poly.c >> +++ b/gcc/graphite-sese-to-poly.c >> @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. If not see >> #include "gimplify.h" >> #include "gimplify-me.h" >> #include "tree-cfg.h" >> +#include "tree-chrec-oacc.h" >> #include "tree-ssa-loop-manip.h" >> #include "tree-ssa-loop-niter.h" >> #include "tree-ssa-loop.h" >> @@ -209,6 +210,8 @@ parameter_index_in_region (tree name, sese_info_p region) >> return -1; >> } >> >> +tree oacc_ifn_call_extract (gimple*); >> + >> /* Extract an affine expression from the tree E in the scop S. */ >> >> static isl_pw_aff * >> @@ -275,6 +278,13 @@ extract_affine (scop_p s, tree e, __isl_take isl_space >> *space) >> case SSA_NAME: >> { >> gcc_assert (! defined_in_sese_p (e, s->scop_info->region)); >> + if (is_oacc_loop_ifn_call_def (e)) >> + { >> + gimple* stmt = SSA_NAME_DEF_STMT (e); >> + return extract_affine (s, oacc_ifn_call_extract (stmt), space); >> + >> + } >> + >> int dim = parameter_index_in_region (e, s->scop_info); >> gcc_assert (dim != -1); >> /* No need to wrap a parameter. */ >> @@ -643,8 +653,20 @@ build_poly_dr (dr_info &dri) >> subscript_sizes = pdr_add_data_dimensions (subscript_sizes, scop, dr); >> } >> >> - new_poly_dr (pbb, DR_STMT (dr), DR_IS_READ (dr) ? PDR_READ : PDR_WRITE, >> - acc, subscript_sizes); >> + bool representable = true; >> + for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i) >> + if (! graphite_can_represent_scev >> + (scop->scop_info->region, DR_ACCESS_FN (dr, i))) >> + representable = false; >> + >> + /* If non-affine access functions are not enabled, the DR should >> + have been rejected during SCoP detection. */ >> + gcc_checking_assert (representable || flag_graphite_non_affine_accesses); >> + >> + poly_dr_type write_type = representable ? PDR_WRITE : PDR_MAY_WRITE; >> + poly_dr_type type = DR_IS_READ (dr) ? PDR_READ : write_type; >> + >> + new_poly_dr (pbb, DR_STMT (dr), type, acc, subscript_sizes); >> } >> >> static void >> diff --git a/gcc/graphite.c b/gcc/graphite.c >> index 27f1e486e1f..3661d92e601 100644 >> --- a/gcc/graphite.c >> +++ b/gcc/graphite.c >> @@ -43,6 +43,8 @@ along with GCC; see the file COPYING3. If not see >> #include "cfghooks.h" >> #include "tree.h" >> #include "gimple.h" >> +#include "gimple-iterator.h" >> +#include "gimplify-me.h" >> #include "ssa.h" >> #include "fold-const.h" >> #include "gimple-iterator.h" >> @@ -58,6 +60,18 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-ssa.h" >> #include "tree-into-ssa.h" >> #include "graphite.h" >> +#include "graphite-oacc.h" >> +#include "cgraph.h" >> +#include "gimple-pretty-print.h" >> +#include "print-tree.h" >> + >> +static bool have_isl = true; >> + >> +#ifdef ACCEL_COMPILER >> +static bool accel_compiler = true; >> +#else >> +static bool accel_compiler = false; >> +#endif >> >> /* Print global statistics to FILE. */ >> >> @@ -348,6 +362,220 @@ canonicalize_loop_closed_ssa (loop_p loop, edge e) >> } >> } >> >> +struct goacc_parallel_info { >> + gcall* call; >> + loop_p loop; >> +}; >> + >> +/* Find the first call to BUILT_IN_GOACC_PARALLEL in the given >> + function where the type of the ".omp_data_arr" argument matches the >> + given type. The type of that argument is sufficient to find the >> + call to the right outlined function, i.e. we don't need to check >> + the function name. Return this call or a NULL_TREE if there is no >> + such call in the function. */ >> + >> +static goacc_parallel_info find_goacc_parallel_call (struct function* fn) >> +{ >> + goacc_parallel_info result; >> + >> + basic_block bb; >> + FOR_EACH_BB_FN (bb, fn) >> + { >> + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); >> + gsi_next (&gsi)) >> + { >> + gimple *call = gsi_stmt (gsi); >> + if (! is_gimple_call (call) >> + || ! gimple_call_builtin_p (call, BUILT_IN_NORMAL)) >> + continue; >> + >> + built_in_function code = >> + DECL_FUNCTION_CODE (gimple_call_fndecl (call)); >> + if (code != BUILT_IN_GOACC_PARALLEL) >> + continue; >> + >> + tree called_fn_addr = gimple_call_arg (call, 1); >> + gcc_checking_assert (TREE_CODE (called_fn_addr) == ADDR_EXPR); >> + tree called_fn = TREE_OPERAND (called_fn_addr, 0); >> + >> + if (called_fn != cfun->decl) >> + continue; >> + >> + result.call = (gcall*)call; >> + result.loop = bb->loop_father; >> + } >> + } >> + >> + return result; >> +} >> + >> +static tree >> +get_goacc_parallel_omp_data_arg (gimple* call) >> +{ >> + gcc_checking_assert (is_gimple_call (call) >> + && gimple_call_builtin_p (call, BUILT_IN_NORMAL) >> + && DECL_FUNCTION_CODE (gimple_call_fndecl (call)) >> + == BUILT_IN_GOACC_PARALLEL); >> + tree omp_data_ptr = gimple_call_arg (call, 3); >> + tree omp_data = TREE_OPERAND (omp_data_ptr, 0); >> + >> + return omp_data; >> +} >> + >> +/* TODO-kernels This was meant as a way to allow to peak into the >> + original function from an outlined function to see, for instance, >> + if some values are known to be constant since constant propagation >> + fails to propagate values into the outlined function. Currently >> + unused and can be removed. * >> + >> +/* Gather direct assignments to STRUCT in FN in MAP. Each field of >> + STRUCT that gets assigned to in FN is mapped to the corresponding >> + rhs of the last encountered assignment. */ >> + >> +void >> +oacc_omp_data::gather_assignments (struct function *fn) >> +{ >> + if (!fn) >> + return; >> + >> + basic_block bb; >> + FOR_EACH_BB_FN (bb, fn) >> + { >> + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); >> + gsi_next (&gsi)) >> + { >> + gimple *assign = gsi_stmt (gsi); >> + >> + if (! is_gimple_assign (assign)) >> + continue; >> + >> + tree lhs = gimple_assign_lhs (assign); >> + if (TREE_CODE (lhs) != COMPONENT_REF >> + || TREE_OPERAND (lhs, 0) != src_fn_arg) >> + continue; >> + >> + tree field = DECL_NAME (TREE_OPERAND (lhs, 1)); >> + >> + oacc_data_field& value = field_map.get_or_insert (field); >> + value.src_var = gimple_assign_rhs1 (assign); >> + } >> + } >> + >> + unsigned i; >> + tree var; >> + FOR_EACH_VEC_SAFE_ELT (cfun->local_decls, i, var) >> + { >> + oacc_data_field* value = field_map.get (DECL_NAME (var)); >> + if (value) >> + value->tgt_var = var; >> + } >> +} >> + >> +/* TODO-kernels This will have to be adapted to work with the >> +"exploded arguments" patch because the mapping of names from the >> +outlined function to the original function relies on the omp_data >> +arguments to the outlined function which don't exist after "exploding" >> +the arguments. */ >> + >> +oacc_omp_data >> +oacc_omp_data::construct (struct function* fn) >> +{ >> + tree omp_data_arr_arg = NULL_TREE; >> + loop_p call_loop = NULL; >> + >> + if (fn) >> + { >> + goacc_parallel_info info = find_goacc_parallel_call (fn); >> + gcall *parallel_call = info.call; >> + call_loop = info.loop; >> + gcc_checking_assert (parallel_call); >> + gcc_checking_assert (call_loop); >> + >> + omp_data_arr_arg = get_goacc_parallel_omp_data_arg (parallel_call); >> + gcc_checking_assert (omp_data_arr_arg); >> + } >> + tree omp_data_i_arg = DECL_ARGUMENTS (cfun->decl); >> + >> + oacc_omp_data omp_data; >> + omp_data.src_fn_arg = omp_data_arr_arg; >> + omp_data.tgt_fn_arg = omp_data_i_arg; >> + omp_data.loop = call_loop; >> + >> + omp_data.gather_assignments (fn); >> + >> + return omp_data; >> +} >> + >> +/* TODO-kernels How to implement this in a stable way? The name of the >> + original function may change (constprop?) which causes the search >> + to fail. */ >> + >> +/* Return the function from which the OpenACC OUTLINED_FN >> + has been outlined. */ >> + >> +static function* >> +find_oacc_src_fn (function* outlined_fn) >> +{ >> + gcc_assert (! accel_compiler); >> + gcc_assert (oacc_function_p (outlined_fn)); >> + >> + tree name = DECL_ASSEMBLER_NAME (outlined_fn->decl); >> + const char* id = IDENTIFIER_POINTER (name); >> + const unsigned len = IDENTIFIER_LENGTH (name); >> + >> + /* id is the name of the function from which the current >> + function has been outlined, followed by a suffix that starts >> + with ".omp_fn" */ >> + unsigned i = 0; >> + for (; i < len; ++i) >> + if (id[i] == '.') >> + break; >> + >> + gcc_checking_assert (id[i] == '.'); >> + >> + const char* src_id = ggc_alloc_string (id, i); >> + /* fprintf (stderr, "[%s] Looking for source function '%s'\n. ", >> __FUNCTION__, src_id); */ >> + tree id_node = get_identifier (src_id); >> + >> + cgraph_node* n = cgraph_node::get_for_asmname (id_node); >> + >> + return n ? n->get_fun () : NULL; >> + >> + /* cgraph_node* node; */ >> + /* FOR_EACH_FUNCTION (node) */ >> + /* { */ >> + /* tree node_name = DECL_ASSEMBLER_NAME (node->decl); */ >> + /* const char* node_id = IDENTIFIER_POINTER (node_name); */ >> + >> + /* fprintf (stderr, "[%s] Function '%s'\n. ", __FUNCTION__, node_id); >> */ >> + >> + /* if (strcmp (src_id, node_id) == 0) */ >> + /* return node->get_fun (); */ >> + /* } */ >> + /* return NULL; */ >> +} >> + >> +oacc_context >> +oacc_context::build_context () { >> + if (! oacc_function_p (cfun)) >> + return oacc_context::invalid_context (); >> + >> + struct function* src_fn (find_oacc_src_fn (cfun)); >> + if (! src_fn) >> + { >> + if (dump_file) >> + fprintf (dump_file, >> + "Source function for outlined function %s not found.\n", >> + IDENTIFIER_POINTER (DECL_NAME (cfun->decl))); >> + /* return oacc_context::invalid_context (); */ >> + } >> + >> + oacc_context context (oacc_omp_data::construct (src_fn)); >> + >> + context.valid = true; >> + return context; >> +} >> + >> /* Converts the current loop closed SSA form to a canonical form >> expected by the Graphite code generation. >> >> @@ -405,6 +633,8 @@ canonicalize_loop_form (void) >> >> isl_ctx *the_isl_ctx; >> >> +extern void oacc_set_arg_evolutions (); >> + >> /* Perform a set of linear transforms on the loops of the current >> function. */ >> >> @@ -417,10 +647,34 @@ graphite_transform_loops (void) >> vec<scop_p> scops = vNULL; >> isl_ctx *ctx; >> >> + bool is_oacc_function = oacc_function_p (cfun); >> + >> + oacc_context oacc_ctx(oacc_context::build_context ()); >> + // TODO-kernels Clean this up >> + /* if (is_oacc_function && ! oacc_ctx.is_valid ()) */ >> + /* { */ >> + /* if (dump_file) */ >> + /* fprintf (dump_file, "Could not build OpenACC context for function >> %s. " */ >> + /* "Aborting Graphite.\n", current_function_name ()); */ >> + /* return; */ >> + /* } */ >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + fprintf (dump_file, "\nTransforming Function: %s\n", >> + current_function_name ()); >> + >> /* If a function is parallel it was most probably already run through >> graphite >> once. No need to run again. */ >> - if (parallelized_function_p (cfun->decl)) >> - return; >> + >> + /* TODO-kernels Stop marking kernels regions that should be >> + processed here as "parallelized". */ >> + if (parallelized_function_p (cfun->decl) && ! is_oacc_function) >> + { >> + >> + if (dump_file) >> + fprintf (dump_file, "\nAlready parallelized function.\n"); >> + return; >> + } >> >> calculate_dominance_info (CDI_DOMINATORS); >> >> @@ -445,7 +699,9 @@ graphite_transform_loops (void) >> seir_cache = new hash_map<sese_scev_hash, tree>; >> >> calculate_dominance_info (CDI_POST_DOMINATORS); >> - build_scops (&scops); >> + >> + oacc_set_arg_evolutions (); >> + build_scops (&scops, oacc_ctx.is_valid () ? &oacc_ctx : NULL); >> free_dominance_info (CDI_POST_DOMINATORS); >> >> /* Remove the fake exits before transform given they are not reflected >> @@ -520,6 +776,8 @@ graphite_transform_loops (void) >> >> #else /* If isl is not available: #ifndef HAVE_isl. */ >> >> +static bool have_isl = false; >> + >> static void >> graphite_transform_loops (void) >> { >> @@ -532,7 +790,9 @@ graphite_transform_loops (void) >> static unsigned int >> graphite_transforms (struct function *fun) >> { >> - if (number_of_loops (fun) <= 1) >> + unsigned num_loops = number_of_loops (fun); >> + if (num_loops == 0 >> + || (num_loops == 1 && !oacc_function_p (cfun))) >> return 0; >> >> graphite_transform_loops (); >> @@ -540,9 +800,34 @@ graphite_transforms (struct function *fun) >> return 0; >> } >> >> + >> +/* Return true if fun is an OpenACC outlined function that should be >> +handled by Graphite. */ >> + >> +static inline bool oacc_enable_graphite_p (function *fun) >> +{ >> + return !accel_compiler && flag_openacc && oacc_get_fn_attrib (fun->decl); >> +} >> + >> + >> static bool >> -gate_graphite_transforms (void) >> +gate_graphite_transforms (function *fun) >> { >> + bool oacc_enabled_graphite = oacc_enable_graphite_p (fun); >> + bool optimizing = global_options.x_optimize <= 0; >> + >> + /* Enabling Graphite if isl is not available aborts compilation. >> + Prefer to skip Graphite if compiling without optimisations, >> + but emit a warning. */ >> + if (have_isl || optimizing) >> + flag_graphite_identity |= oacc_enabled_graphite; >> + else { >> + if (oacc_enabled_graphite) >> + warning (OPT_Wall, "Unable to enable Graphite on OpenACC regions," >> + "because isl is not available"); >> + } >> + >> + >> /* Enable -fgraphite pass if any one of the graphite optimization flags >> is turned on. */ >> if (flag_graphite_identity >> @@ -576,7 +861,7 @@ public: >> {} >> >> /* opt_pass methods: */ >> - virtual bool gate (function *) { return gate_graphite_transforms (); } >> + virtual bool gate (function *fun) { return gate_graphite_transforms >> (fun); } >> >> }; // class pass_graphite >> >> @@ -611,7 +896,7 @@ public: >> {} >> >> /* opt_pass methods: */ >> - virtual bool gate (function *) { return gate_graphite_transforms (); } >> + virtual bool gate (function *fun) { return gate_graphite_transforms >> (fun); } >> virtual unsigned int execute (function *fun) { return graphite_transforms >> (fun); } >> >> }; // class pass_graphite_transforms >> @@ -624,4 +909,108 @@ make_pass_graphite_transforms (gcc::context *ctxt) >> return new pass_graphite_transforms (ctxt); >> } >> >> +/* Something like "n * m" is not allowed. */ >> + >> +static bool >> +graphite_can_represent_init (tree e) >> +{ >> + switch (TREE_CODE (e)) >> + { >> + case POLYNOMIAL_CHREC: >> + return graphite_can_represent_init (CHREC_LEFT (e)) >> + && graphite_can_represent_init (CHREC_RIGHT (e)); >> + >> + case MULT_EXPR: >> + if (chrec_contains_symbols (TREE_OPERAND (e, 0))) >> + return graphite_can_represent_init (TREE_OPERAND (e, 0)) >> + && tree_fits_shwi_p (TREE_OPERAND (e, 1)); >> + else >> + return graphite_can_represent_init (TREE_OPERAND (e, 1)) >> + && tree_fits_shwi_p (TREE_OPERAND (e, 0)); >> + >> + case PLUS_EXPR: >> + case POINTER_PLUS_EXPR: >> + case MINUS_EXPR: >> + return graphite_can_represent_init (TREE_OPERAND (e, 0)) >> + && graphite_can_represent_init (TREE_OPERAND (e, 1)); >> + >> + case NEGATE_EXPR: >> + case BIT_NOT_EXPR: >> + CASE_CONVERT: >> + case NON_LVALUE_EXPR: >> + return graphite_can_represent_init (TREE_OPERAND (e, 0)); >> + >> + default: >> + break; >> + } >> + >> + return true; >> +} >> + >> +/* Return true when SCEV can be represented in the polyhedral model. >> + >> + An expression can be represented, if it can be expressed as an >> + affine expression. For loops (i, j) and parameters (m, n) all >> + affine expressions are of the form: >> + >> + x1 * i + x2 * j + x3 * m + x4 * n + x5 * 1 where x1..x5 element of Z >> + >> + 1 i + 20 j + (-2) m + 25 >> + >> + Something like "i * n" or "n * m" is not allowed. */ >> >> +bool >> +graphite_can_represent_scev (sese_l scop, tree scev) >> +{ >> + if (chrec_contains_undetermined (scev)) >> + return false; >> + >> + switch (TREE_CODE (scev)) >> + { >> + case NEGATE_EXPR: >> + case BIT_NOT_EXPR: >> + CASE_CONVERT: >> + case NON_LVALUE_EXPR: >> + return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)); >> + >> + case PLUS_EXPR: >> + case POINTER_PLUS_EXPR: >> + case MINUS_EXPR: >> + return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) >> + && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); >> + >> + case MULT_EXPR: >> + return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0))) >> + && !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 1))) >> + && !(chrec_contains_symbols (TREE_OPERAND (scev, 0)) >> + && chrec_contains_symbols (TREE_OPERAND (scev, 1))) >> + && graphite_can_represent_init (scev) >> + && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0)) >> + && graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1)); >> + >> + case POLYNOMIAL_CHREC: >> + /* Check for constant strides. With a non constant stride of >> + 'n' we would have a value of 'iv * n'. Also check that the >> + initial value can represented: for example 'n * m' cannot be >> + represented. */ >> + gcc_assert (loop_in_sese_p (get_loop (cfun, >> + CHREC_VARIABLE (scev)), scop)); >> + if (!evolution_function_right_is_integer_cst (scev) >> + || !graphite_can_represent_init (scev)) >> + return false; >> + return graphite_can_represent_scev (scop, CHREC_LEFT (scev)); >> + >> + case ADDR_EXPR: >> + /* We cannot encode addresses for ISL. */ >> + return false; >> + >> + default: >> + break; >> + } >> + >> + /* Only affine functions can be represented. */ >> + if (tree_contains_chrecs (scev, NULL) || !scev_is_linear_expression >> (scev)) >> + return false; >> + >> + return true; >> +} >> diff --git a/gcc/graphite.h b/gcc/graphite.h >> index 3fe1345cf96..1e0ccd2de7b 100644 >> --- a/gcc/graphite.h >> +++ b/gcc/graphite.h >> @@ -384,6 +384,10 @@ struct scop >> /* The maximum alias set as assigned to drs by build_alias_sets. */ >> unsigned max_alias_set; >> >> + /* The set of dependent ddrs that were rejected by build_alias_set >> + and that must be handled by other means (runtime checking). */ >> + auto_vec<ddr_p> unhandled_alias_ddrs; >> + >> /* All the basic blocks in this scop that contain memory references >> and that will be represented as statements in the polyhedral >> representation. */ >> @@ -459,10 +463,15 @@ carries_deps (__isl_keep isl_union_map *schedule, >> >> extern bool build_poly_scop (scop_p); >> extern bool graphite_regenerate_ast_isl (scop_p); >> -extern void build_scops (vec<scop_p> *); >> + >> +class oacc_context; >> +extern void build_scops (vec<scop_p> *, oacc_context*); >> + >> extern tree cached_scalar_evolution_in_region (const sese_l &, loop_p, >> tree); >> extern void dot_all_sese (FILE *, vec<sese_l> &); >> extern void dot_sese (sese_l &); >> extern void dot_cfg (); >> >> +extern bool graphite_can_represent_scev (sese_l, tree); >> + >> #endif >> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h >> index 2ec3c504cd6..73c0e7d1880 100644 >> --- a/gcc/internal-fn.h >> +++ b/gcc/internal-fn.h >> @@ -52,10 +52,13 @@ enum ifn_unique_kind { >> >> CHUNK_MAX = LOOP (CODE_CHUNKS, DIR, RANGE, STEP, CHUNK_SIZE, MASK) >> STEP = LOOP (CODE_STEP, DIR, RANGE, STEP, CHUNK_SIZE, MASK) >> - OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, >> CHUNK_NO) >> - BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, OFFSET) >> + OFFSET = LOOP (CODE_OFFSET, DIR, RANGE, STEP, CHUNK_SIZE, MASK, BASE, >> CHUNK_NO) >> + BOUND = LOOP (CODE_BOUND, DIR, RANGE, STEP, CHUNK_SIZE, MASK, END, >> OFFSET) >> >> DIR - +1 for up loop, -1 for down loop >> + BASE - Initial value of the loop's iteration variable. >> + END - Last value of the loop's iteration variable +1 or -1, depending >> on the >> + direction of the iteration. >> RANGE - Range of loop (END - BASE) >> STEP - iteration step size >> CHUNKING - size of chunking, (constant zero for no chunking) >> diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c >> index 08afaceb87e..b3d21c1181d 100644 >> --- a/gcc/omp-expand.c >> +++ b/gcc/omp-expand.c >> @@ -6015,8 +6015,8 @@ expand_omp_taskloop_for_inner (struct omp_region >> *region, >> T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV); >> >> <head_bb> [created by splitting end of entry_bb] >> - T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, >> chunk_no); >> - T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset); >> + T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, b, >> chunk_no); >> + T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, e, offset); >> if (!(offset LTGT bound)) goto bottom_bb; >> >> <body_bb> [incoming] >> @@ -6270,20 +6270,22 @@ expand_oacc_for (struct omp_region *region, struct >> omp_for_data *fd) >> /* Loop offset & bound go into head_bb. */ >> gsi = gsi_start_bb (head_bb); >> >> - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, >> + tree begin = force_gimple_operand_gsi (&gsi, unshare_expr (fd->loop.n1), >> true, NULL_TREE, true, GSI_SAME_STMT); >> + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, >> build_int_cst (integer_type_node, >> IFN_GOACC_LOOP_OFFSET), >> dir, range, s, >> - chunk_size, gwv, chunk_no); >> + chunk_size, gwv, begin, chunk_no); >> gimple_call_set_lhs (call, offset_init); >> gimple_set_location (call, loc); >> gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); >> >> - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, >> + tree end = force_gimple_operand_gsi (&gsi, unshare_expr (fd->loop.n2), >> true, NULL_TREE, true, GSI_SAME_STMT); >> + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, >> build_int_cst (integer_type_node, >> IFN_GOACC_LOOP_BOUND), >> dir, range, s, >> - chunk_size, gwv, offset_init); >> + chunk_size, gwv, end, offset_init); >> gimple_call_set_lhs (call, bound); >> gimple_set_location (call, loc); >> gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING); >> @@ -6332,16 +6334,20 @@ expand_oacc_for (struct omp_region *region, struct >> omp_for_data *fd) >> tree t, e_gwv = integer_minus_one_node; >> tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */ >> >> + tree begin = force_gimple_operand_gsi (&gsi, unshare_expr >> (fd->loop.n1), >> + true, NULL_TREE, true, >> GSI_SAME_STMT); >> t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET); >> - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, >> e_range, >> - element_s, chunk, e_gwv, chunk); >> + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, t, dir, >> e_range, >> + element_s, chunk, e_gwv, begin, >> chunk); >> gimple_call_set_lhs (call, e_offset); >> gimple_set_location (call, loc); >> gsi_insert_before (&gsi, call, GSI_SAME_STMT); >> >> + tree end = force_gimple_operand_gsi (&gsi, unshare_expr >> (fd->loop.n2), >> + true, NULL_TREE, true, >> GSI_SAME_STMT); >> t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND); >> - call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, >> e_range, >> - element_s, chunk, e_gwv, >> e_offset); >> + call = gimple_build_call_internal (IFN_GOACC_LOOP, 8, t, dir, >> e_range, >> + element_s, chunk, e_gwv, end, >> e_offset); >> gimple_call_set_lhs (call, e_bound); >> gimple_set_location (call, loc); >> gsi_insert_before (&gsi, call, GSI_SAME_STMT); >> diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c >> index a73b73fb41a..3e81e878cf9 100644 >> --- a/gcc/omp-offload.c >> +++ b/gcc/omp-offload.c >> @@ -83,6 +83,8 @@ struct oacc_loop >> vec<gcall *> ifns; /* Contained loop abstraction functions. */ >> tree chunk_size; /* Chunk size. */ >> gcall *head_end; /* Final marker of head sequence. */ >> + >> + bool can_be_parallel; /* Can the loop be parallelized?. */ >> }; >> >> /* Holds offload tables with decls. */ >> @@ -657,7 +659,7 @@ oacc_xform_loop (gcall *call) >> >> if (chunking) >> { >> - tree chunk = fold_convert (diff_type, gimple_call_arg (call, >> 6)); >> + tree chunk = fold_convert (diff_type, gimple_call_arg (call, >> 7)); >> tree per >> = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size); >> per = build2 (MULT_EXPR, diff_type, per, chunk); >> @@ -697,7 +699,7 @@ oacc_xform_loop (gcall *call) >> >> r = fold_build2 (MULT_EXPR, diff_type, span, step); >> >> - tree offset = gimple_call_arg (call, 6); >> + tree offset = gimple_call_arg (call, 7); >> r = build2 (PLUS_EXPR, diff_type, r, >> fold_convert (diff_type, offset)); >> r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR, >> @@ -911,7 +913,8 @@ oacc_validate_dims (tree fn, tree attrs, int *dims, int >> level, unsigned used) >> check = false; >> #endif >> if (check >> - && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn))) >> + && !lookup_attribute ("oacc kernels", DECL_ATTRIBUTES (fn)) >> + && !lookup_attribute ("oacc parallel_kernels_graphite", >> DECL_ATTRIBUTES (fn))) >> { >> static char const *const axes[] = >> /* Must be kept in sync with GOMP_DIM enumeration. */ >> @@ -1015,6 +1018,11 @@ new_oacc_loop (oacc_loop *parent, gcall *marker) >> flags. */ >> >> loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3)); >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + dump_printf_loc (MSG_NOTE, >> + dump_user_location_t::from_location_t (loop->loc), >> + "[new_oacc_loop] Found loop %s 'auto' directive.\n", >> + loop->flags & OLF_AUTO ? "with" : "without"); >> >> tree chunk_size = integer_zero_node; >> if (loop->flags & OLF_GANG_STATIC) >> @@ -1217,45 +1225,62 @@ oacc_loop_discover_walk (oacc_loop *loop, >> basic_block bb) >> break; >> >> case IFN_UNIQUE: >> - enum ifn_unique_kind kind >> - = (enum ifn_unique_kind) (TREE_INT_CST_LOW >> - (gimple_call_arg (call, 0))); >> - if (kind == IFN_UNIQUE_OACC_HEAD_MARK >> - || kind == IFN_UNIQUE_OACC_TAIL_MARK) >> - { >> - if (gimple_call_num_args (call) == 2) >> - { >> - gcc_assert (marker && !remaining); >> - marker = 0; >> - if (kind == IFN_UNIQUE_OACC_TAIL_MARK) >> - loop = finish_oacc_loop (loop); >> - else >> - loop->head_end = call; >> - } >> - else >> - { >> - int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); >> + { >> + enum ifn_unique_kind kind >> + = (enum ifn_unique_kind) (TREE_INT_CST_LOW >> + (gimple_call_arg (call, 0))); >> + if (kind == IFN_UNIQUE_OACC_HEAD_MARK >> + || kind == IFN_UNIQUE_OACC_TAIL_MARK) >> + { >> + if (gimple_call_num_args (call) == 2) >> + { >> + gcc_assert (marker && !remaining); >> + marker = 0; >> + if (kind == IFN_UNIQUE_OACC_TAIL_MARK) >> + loop = finish_oacc_loop (loop); >> + else >> + loop->head_end = call; >> + } >> + else >> + { >> + int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2)); >> >> - if (!marker) >> - { >> - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) >> - loop = new_oacc_loop (loop, call); >> - remaining = count; >> - } >> - gcc_assert (count == remaining); >> - if (remaining) >> - { >> - remaining--; >> - if (kind == IFN_UNIQUE_OACC_HEAD_MARK) >> - loop->heads[marker] = call; >> - else >> - loop->tails[remaining] = call; >> - } >> - marker++; >> - } >> - } >> + if (!marker) >> + { >> + if (kind == IFN_UNIQUE_OACC_HEAD_MARK) >> + loop = new_oacc_loop (loop, call); >> + >> + remaining = count; >> + } >> + gcc_assert (count == remaining); >> + if (remaining) >> + { >> + remaining--; >> + if (kind == IFN_UNIQUE_OACC_HEAD_MARK) >> + loop->heads[marker] = call; >> + else >> + loop->tails[remaining] = call; >> + } >> + marker++; >> + } >> + } >> + break; >> + } >> + >> + case IFN_GOACC_REDUCTION: >> + break; >> } >> } >> + >> + if (bb->loop_father->can_be_parallel) >> + { >> + loop->can_be_parallel = true; >> + const dump_user_location_t loc >> + = dump_user_location_t::from_location_t (loop->loc); >> + if (dump_file) >> + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Detected >> parallelizable loop."); >> + } >> + >> if (remaining || marker) >> { >> bb = single_succ (bb); >> @@ -1411,12 +1436,61 @@ oacc_loop_process (oacc_loop *loop) >> oacc_loop_process (loop->sibling); >> } >> >> -/* Walk the OpenACC loop heirarchy checking and assigning the >> +/* Interpret the "can_be_parallel" flag of the LOOP to decide >> + if it can be made "independent" */ >> + >> +static bool oacc_loop_parallelize (oacc_loop *loop) { >> + if (loop->routine) >> + return false; >> + >> + if (!(loop->flags & OLF_AUTO)) >> + { >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + dump_printf_loc (MSG_NOTE, >> + dump_user_location_t::from_location_t (loop->loc), >> + "[oacc_loop_parallelize] Not an 'auto' loop.\n"); >> + >> + >> + return false; >> + } >> + >> + if (!loop->can_be_parallel) >> + { >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, >> + dump_user_location_t::from_location_t (loop->loc), >> + "'auto' loop cannot be parallel.\n"); >> + return false; >> + } >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, >> + dump_user_location_t::from_location_t (loop->loc), >> + "'auto' loop can be parallel.\n"); >> + >> + loop->flags |= OLF_INDEPENDENT; >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + fprintf (dump_file, "[oacc_loop_parallelize] %s:%u Transformed 'auto' >> into 'independent'.\n", >> + LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc)); >> + >> + // loop->flags &= ~OLF_AUTO; >> + /* TODO-kernels Right now we need to keep the OLF_AUTO flag for >> + further processing in oacc_loop_fixed_partitions and >> + oacc_loop_auto_partitions. We should remove it here and use >> + another flag to indicate that the partitioning must be >> + assigned. */ >> + >> + return true; >> +} >> + >> +/* Walk the OpenACC loop hierarchy checking and assigning the >> programmer-specified partitionings. OUTER_MASK is the partitioning >> this loop is contained within. Return mask of partitioning >> encountered. If any auto loops are discovered, set GOMP_DIM_MAX >> bit. */ >> >> + >> static unsigned >> oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) >> { >> @@ -1446,14 +1520,18 @@ oacc_loop_fixed_partitions (oacc_loop *loop, >> unsigned outer_mask) >> >> if ((this_mask != 0) + auto_par + seq_par > 1) >> { >> - if (noisy) >> - error_at (loop->loc, >> - seq_par >> - ? G_("%<seq%> overrides other OpenACC loop specifiers") >> - : G_("%<auto%> conflicts with other OpenACC loop " >> - "specifiers")); >> + if (seq_par && noisy) >> + error_at (loop->loc, G_("%<seq%> overrides other OpenACC loop >> specifiers")); >> maybe_auto = false; >> + >> + if (dump_file && (dump_flags & TDF_DETAILS)) >> + dump_printf_loc (MSG_NOTE, >> + dump_user_location_t::from_location_t >> (loop->loc), >> + "[oacc_loop_fixed_partitions] Removed >> 'auto'.\n"); >> + >> + >> loop->flags &= ~OLF_AUTO; >> + >> if (seq_par) >> { >> loop->flags >> @@ -1467,6 +1545,9 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned >> outer_mask) >> loop->flags |= OLF_AUTO; >> mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); >> } >> + >> + if (oacc_loop_parallelize (loop)) >> + mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX); >> } >> >> if (this_mask & outer_mask) >> diff --git a/gcc/predict.c b/gcc/predict.c >> index a7ae977c866..c44aac58f28 100644 >> --- a/gcc/predict.c >> +++ b/gcc/predict.c >> @@ -4035,7 +4035,7 @@ pass_profile::execute (function *fun) >> class loop *loop; >> FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) >> if (loop->header->count.initialized_p ()) >> - fprintf (dump_file, "Loop got predicted %d to iterate %i times.\n", >> + fprintf (dump_file, "Loop %d got predicted to iterate %i times.\n", >> loop->num, >> (int)expected_loop_iterations_unbounded (loop)); >> } >> diff --git a/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c >> b/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c >> new file mode 100644 >> index 00000000000..7228fb09818 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/graphite/alias-0-no-runtime-check.c >> @@ -0,0 +1,20 @@ >> +/* This test demonstrates a loop nest that Graphite cannot handle >> + because of aliasing. But the loop nest can be handled with enabled >> + runtime alias checking. */ >> + >> +/* { dg-options "-O2 -fgraphite-identity -fno-graphite-runtime-alias-checks >> -fdump-tree-graphite-details" } */ >> + >> +void sum(int *x, int *y, unsigned *sum) >> +{ >> + unsigned i,j; >> + *sum = 0; >> + >> + for (i = 0; i < 10000; i=i+1) >> + { >> + int xi = x[i]; >> + for (j = 0; j < 22222; j=j+1) >> + *sum += xi + y[j]; >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump "number of SCoPs: 0" "graphite"} } */ >> diff --git a/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c >> b/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c >> new file mode 100644 >> index 00000000000..a9f9ef99908 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/graphite/alias-0-runtime-check.c >> @@ -0,0 +1,21 @@ >> +/* This test demonstrates a loop nest that Graphite cannot handle >> + because of aliasing. But the loop nest can be handled with enabled >> + runtime alias checking. */ >> + >> +/* { dg-options "-O2 -fgraphite-identity -fgraphite-runtime-alias-checks >> -fdump-tree-graphite-details" } */ >> + >> +void sum(int *x, int *y, unsigned *sum) >> +{ >> + unsigned i,j; >> + *sum = 0; >> + >> + for (i = 0; i < 10000; i=i+1) >> + { >> + int xi = x[i]; >> + for (j = 0; j < 22222; j=j+1) >> + *sum += xi + y[j]; >> + } >> +} >> + >> +/* { dg-final { scan-tree-dump "number of SCoPs: 1" "graphite"} } */ >> +/* { dg-final { scan-tree-dump "Generated runtime alias >> check.*?sum_.*?x_.*?y_.*?\n" "graphite"} } */ >> diff --git a/gcc/testsuite/gcc.dg/graphite/alias-1.c >> b/gcc/testsuite/gcc.dg/graphite/alias-1.c >> new file mode 100644 >> index 00000000000..ee80dae1df3 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/graphite/alias-1.c >> @@ -0,0 +1,22 @@ >> +/* This test demonstrates a loop nest that Graphite cannot handle >> + because of aliasing. It should be possible to handle this loop nest >> + by creating a runtime alias check like in the very similar test >> + alias-0-runtime-check.c. However Graphite analyses the data >> + reference with respect to the innermost loop that contains the data >> + reference, the variable "i" remains uninstantiated (in contrast to >> + "j"), and consequently the alias check cannot be placed outside of >> + the SCoP since "i" is not defined there. */ >> + >> +/* { dg-options "-O2 -fgraphite-identity -fgraphite-runtime-alias-checks >> -fdump-tree-graphite-details" } */ >> + >> +void sum(int *x, int *y, unsigned *sum) >> +{ >> + unsigned i,j; >> + *sum = 0; >> + >> + for (i = 0; i < 10000; i=i+1) >> + for (j = 0; j < 22222; j=j+1) >> + *sum += x[i] + y[j]; >> +} >> + >> +/* { dg-final { scan-tree-dump "number of SCoPs: 1" "graphite" { xfail >> *-*-* } } } */ >> diff --git a/gcc/tree-chrec-oacc.h b/gcc/tree-chrec-oacc.h >> new file mode 100644 >> index 00000000000..bcbb1e03657 >> --- /dev/null >> +++ b/gcc/tree-chrec-oacc.h >> @@ -0,0 +1,45 @@ >> +/* OpenACC helpers for Chains of recurrences. >> + Copyright (C) 2003-2020 Free Software Foundation, Inc. >> + >> +This file is part of GCC. >> + >> +GCC is free software; you can redistribute it and/or modify it under >> +the terms of the GNU General Public License as published by the Free >> +Software Foundation; either version 3, or (at your option) any later >> +version. >> + >> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY >> +WARRANTY; without even the implied warranty of MERCHANTABILITY or >> +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License >> +for more details. >> + >> +You should have received a copy of the GNU General Public License >> +along with GCC; see the file COPYING3. If not see >> +<http://www.gnu.org/licenses/>. */ >> + >> +#ifndef GCC_TREE_CHREC_OACC_H >> +#define GCC_TREE_CHREC_OACC_H >> + >> +#include <gimple.h> >> +#include <internal-fn.h> >> + >> +/* Check if the tree is an SSA_NAME whose defining statement >> + is a call to a IFN_GOACC_LOOP function. */ >> +static inline bool >> +is_oacc_loop_ifn_call_def (tree t) { >> + tree_code code = TREE_CODE (t); >> + return (code == SSA_NAME >> + && gimple_call_internal_p (SSA_NAME_DEF_STMT (t), IFN_GOACC_LOOP)); >> +} >> + >> +/* Check if the tree is an SSA_NAME whose defining statement >> + is a call to a IFN_GOACC_LOOP function. */ >> +static inline bool >> +is_oacc_ifn_call_def (tree t) { >> + if (TREE_CODE (t) != SSA_NAME) >> + return false; >> + >> + return (gimple_call_internal_p (SSA_NAME_DEF_STMT (t), IFN_GOACC_LOOP) >> + || gimple_call_internal_p (SSA_NAME_DEF_STMT (t), >> IFN_GOACC_REDUCTION)); >> +} >> +#endif /* GCC_TREE_CHREC_OACC_H */ >> diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c >> index a8848067040..f536d6001ce 100644 >> --- a/gcc/tree-chrec.c >> +++ b/gcc/tree-chrec.c >> @@ -1744,8 +1744,17 @@ scev_is_linear_expression (tree scev) >> } >> } >> >> -/* Determines whether the expression CHREC contains only interger consts >> - in the right parts. */ >> +static bool >> +is_oacc_loop_call (tree chrec) { >> + return TREE_CODE (chrec) == SSA_NAME >> + && gimple_call_internal_p (SSA_NAME_DEF_STMT (chrec), >> + IFN_GOACC_LOOP); >> + >> +} >> + >> +/* Determines whether the expression CHREC contains only integer >> + consts in the right parts. OpenACC internal function calls >> + which encode integer constants are also admitted. */ >> >> bool >> evolution_function_right_is_integer_cst (const_tree chrec) >> @@ -1759,7 +1768,8 @@ evolution_function_right_is_integer_cst (const_tree >> chrec) >> return true; >> >> case POLYNOMIAL_CHREC: >> - return TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST >> + return (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST >> + || is_oacc_loop_call (CHREC_RIGHT (chrec))) >> && (TREE_CODE (CHREC_LEFT (chrec)) != POLYNOMIAL_CHREC >> || evolution_function_right_is_integer_cst (CHREC_LEFT (chrec))); >> >> diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c >> index 5505ba46778..9094b2ac45d 100644 >> --- a/gcc/tree-data-ref.c >> +++ b/gcc/tree-data-ref.c >> @@ -85,6 +85,7 @@ along with GCC; see the file COPYING3. If not see >> #include "fold-const.h" >> #include "expr.h" >> #include "gimple-iterator.h" >> +#include "tree-chrec-oacc.h" >> #include "tree-ssa-loop-niter.h" >> #include "tree-ssa-loop.h" >> #include "tree-ssa.h" >> @@ -97,6 +98,8 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-eh.h" >> #include "ssa.h" >> #include "internal-fn.h" >> +#include "print-tree.h" >> +#include "graphite-oacc.h" >> >> static struct datadep_stats >> { >> @@ -884,18 +887,23 @@ canonicalize_base_object_address (tree addr) >> dummy outermost loop. In other cases perform loop analysis. >> >> Return true if the analysis succeeded and store the results in DRB if so. >> - BB analysis can only fail for bitfield or reversed-storage accesses. */ >> + BB analysis can only fail for bitfield or reversed-storage accesses. >> + >> + If ALLOW_NON_AFFINE_BASE is true, the function will not return false if >> + the base is non-affine. */ >> >> opt_result >> dr_analyze_innermost (innermost_loop_behavior *drb, tree ref, >> - class loop *loop, const gimple *stmt) >> + class loop *loop, const gimple *stmt, >> + // TODO-kernels Rename (also allows non affine offset) >> + bool allow_non_affine_base) >> { >> poly_int64 pbitsize, pbitpos; >> tree base, poffset; >> machine_mode pmode; >> int punsignedp, preversep, pvolatilep; >> affine_iv base_iv, offset_iv; >> - tree init, dinit, step; >> + tree init, dinit; >> bool in_loop = (loop && loop->num); >> >> if (dump_file && (dump_flags & TDF_DETAILS)) >> @@ -945,17 +953,20 @@ dr_analyze_innermost (innermost_loop_behavior *drb, >> tree ref, >> else >> base = build_fold_addr_expr (base); >> >> + bool affine_base = true; >> if (in_loop) >> { >> - if (!simple_iv (loop, loop, base, &base_iv, true)) >> + affine_base = simple_iv (loop, loop, base, &base_iv, true); >> + if (!affine_base && !allow_non_affine_base) >> return opt_result::failure_at >> (stmt, "failed: evolution of base is not affine.\n"); >> } >> - else >> + >> + if (!in_loop || !affine_base) >> { >> base_iv.base = base; >> base_iv.step = ssize_int (0); >> - base_iv.no_overflow = true; >> + base_iv.no_overflow = affine_base ? false : true; >> } >> >> if (!poffset) >> @@ -965,14 +976,18 @@ dr_analyze_innermost (innermost_loop_behavior *drb, >> tree ref, >> } >> else >> { >> - if (!in_loop) >> - { >> - offset_iv.base = poffset; >> - offset_iv.step = ssize_int (0); >> - } >> - else if (!simple_iv (loop, loop, poffset, &offset_iv, true)) >> - return opt_result::failure_at >> - (stmt, "failed: evolution of offset is not affine.\n"); >> + offset_iv.base = poffset; >> + offset_iv.step = ssize_int (0); >> + >> + if (in_loop && ! simple_iv (loop, loop, poffset, &offset_iv, true)) { >> + if (!allow_non_affine_base) >> + return opt_result::failure_at >> + (stmt, "failed: evolution of offset is not affine.\n"); >> + offset_iv.base = poffset; >> + offset_iv.step = ssize_int (0); >> + >> + >> + } >> } >> >> init = ssize_int (pbytepos); >> @@ -983,14 +998,8 @@ dr_analyze_innermost (innermost_loop_behavior *drb, >> tree ref, >> init = size_binop (PLUS_EXPR, init, dinit); >> base_misalignment -= TREE_INT_CST_LOW (dinit); >> >> - split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); >> - init = size_binop (PLUS_EXPR, init, dinit); >> - >> - step = size_binop (PLUS_EXPR, >> - fold_convert (ssizetype, base_iv.step), >> - fold_convert (ssizetype, offset_iv.step)); >> - >> base = canonicalize_base_object_address (base_iv.base); >> + split_constant_offset (offset_iv.base, &offset_iv.base, &dinit); >> >> /* See if get_pointer_alignment can guarantee a higher alignment than >> the one we calculated above. */ >> @@ -1013,7 +1022,10 @@ dr_analyze_innermost (innermost_loop_behavior *drb, >> tree ref, >> drb->base_address = base; >> drb->offset = fold_convert (ssizetype, offset_iv.base); >> drb->init = init; >> - drb->step = step; >> + drb->step = size_binop (PLUS_EXPR, >> + fold_convert (ssizetype, base_iv.step), >> + fold_convert (ssizetype, offset_iv.step)); >> + >> if (known_misalignment (base_misalignment, base_alignment, >> &drb->base_misalignment)) >> drb->base_alignment = base_alignment; >> @@ -1023,7 +1035,7 @@ dr_analyze_innermost (innermost_loop_behavior *drb, >> tree ref, >> drb->base_misalignment = 0; >> } >> drb->offset_alignment = highest_pow2_factor (offset_iv.base); >> - drb->step_alignment = highest_pow2_factor (step); >> + drb->step_alignment = highest_pow2_factor (drb->step); >> >> if (dump_file && (dump_flags & TDF_DETAILS)) >> fprintf (dump_file, "success.\n"); >> @@ -1096,7 +1108,9 @@ dr_analyze_indices (struct data_reference *dr, edge >> nest, loop_p loop) >> { >> op = TREE_OPERAND (ref, 1); >> access_fn = analyze_scalar_evolution (loop, op); >> - access_fn = instantiate_scev (nest, loop, access_fn); >> + tree instantiated_fn = instantiate_scev (nest, loop, access_fn); >> + if (instantiated_fn) >> + access_fn = instantiated_fn; >> access_fns.safe_push (access_fn); >> } >> else if (TREE_CODE (ref) == COMPONENT_REF >> @@ -1128,7 +1142,9 @@ dr_analyze_indices (struct data_reference *dr, edge >> nest, loop_p loop) >> { >> op = TREE_OPERAND (ref, 0); >> access_fn = analyze_scalar_evolution (loop, op); >> - access_fn = instantiate_scev (nest, loop, access_fn); >> + tree instantiated_fn = instantiate_scev (nest, loop, access_fn); >> + if (instantiated_fn) >> + access_fn = instantiated_fn; >> if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC) >> { >> tree orig_type; >> @@ -1230,11 +1246,15 @@ free_data_ref (data_reference_p dr) >> >> Return the data_reference description of MEMREF. NEST is the outermost >> loop in which the reference should be instantiated, LOOP is the loop >> - in which the data reference should be analyzed. */ >> + in which the data reference should be analyzed. >> + >> + If ALLOW_NON_AFFINE_BASE is true, the function will not fail if the >> + base is non-affine. */ >> >> struct data_reference * >> create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt, >> - bool is_read, bool is_conditional_in_stmt) >> + bool is_read, bool is_conditional_in_stmt, >> + bool allow_non_affine_base) >> { >> struct data_reference *dr; >> >> @@ -1252,7 +1272,8 @@ create_data_ref (edge nest, loop_p loop, tree memref, >> gimple *stmt, >> DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt; >> >> dr_analyze_innermost (&DR_INNERMOST (dr), memref, >> - nest != NULL ? loop : NULL, stmt); >> + nest != NULL ? loop : NULL, stmt, >> + allow_non_affine_base); >> dr_analyze_indices (dr, nest, loop); >> dr_analyze_alias (dr); >> >> @@ -5422,6 +5443,7 @@ struct data_ref_loc >> bool is_conditional_in_stmt; >> }; >> >> +tree oacc_ifn_call_extract (gimple *call); >> >> /* Stores the locations of memory references in STMT to REFERENCES. Returns >> true if STMT clobbers memory, false otherwise. */ >> @@ -5444,6 +5466,10 @@ get_references_in_stmt (gimple *stmt, >> vec<data_ref_loc, va_heap> *references) >> if (gimple_call_internal_p (stmt)) >> switch (gimple_call_internal_fn (stmt)) >> { >> + case IFN_GOACC_REDUCTION: >> + case IFN_UNIQUE: >> + case IFN_GOACC_LOOP: >> + break; >> case IFN_GOMP_SIMD_LANE: >> { >> class loop *loop = gimple_bb (stmt)->loop_father; >> @@ -5519,6 +5545,25 @@ get_references_in_stmt (gimple *stmt, >> vec<data_ref_loc, va_heap> *references) >> ptr); >> references->safe_push (ref); >> return false; >> + case IFN_GOACC_LOOP: >> + /* Treat this like a reference to the data from the >> + original loop (offset, bound etc.) that has been >> + replaced by the internal function call in >> + omp-expand.c. */ >> + >> + op0 = gimple_call_lhs (stmt); >> + op1 = oacc_ifn_call_extract (stmt); >> + >> + if (DECL_P (op1) >> + || (REFERENCE_CLASS_P (op1) && get_base_address (op1))) >> + { >> + ref.ref = op1; >> + ref.is_read = true; >> + ref.is_conditional_in_stmt = false; >> + references->safe_push (ref); >> + } >> + return false; >> + >> default: >> break; >> } >> @@ -5616,11 +5661,15 @@ find_data_references_in_stmt (class loop *nest, >> gimple *stmt, >> unanalyzable reference, returns false, otherwise returns true. >> NEST is the outermost loop of the loop nest in which the references >> should be instantiated, LOOP is the loop in which the references >> - should be analyzed. */ >> + should be analyzed. >> + If ALLOW_NON_AFFINE_BASE is true, the data references are allowed >> + to have a non-affine base. */ >> >> bool >> graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt, >> - vec<data_reference_p> *datarefs) >> + vec<data_reference_p> *datarefs, >> + oacc_context *oacc_ctx, >> + bool allow_non_affine_base) >> { >> unsigned i; >> auto_vec<data_ref_loc, 2> references; >> @@ -5634,7 +5683,8 @@ graphite_find_data_references_in_stmt (edge nest, >> loop_p loop, gimple *stmt, >> FOR_EACH_VEC_ELT (references, i, ref) >> { >> dr = create_data_ref (nest, loop, ref->ref, stmt, ref->is_read, >> - ref->is_conditional_in_stmt); >> + ref->is_conditional_in_stmt, >> + allow_non_affine_base); >> gcc_assert (dr != NULL); >> datarefs->safe_push (dr); >> } >> diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h >> index 771d20fbbc3..2d82e0ad923 100644 >> --- a/gcc/tree-data-ref.h >> +++ b/gcc/tree-data-ref.h >> @@ -515,7 +515,7 @@ typedef struct data_dependence_relation *ddr_p; >> >> >> opt_result dr_analyze_innermost (innermost_loop_behavior *, tree, >> - class loop *, const gimple *); >> + class loop *, const gimple *, bool = false); >> extern bool compute_data_dependences_for_loop (class loop *, bool, >> vec<loop_p> *, >> vec<data_reference_p> *, >> @@ -539,12 +539,14 @@ extern void free_data_ref (data_reference_p); >> extern void free_data_refs (vec<data_reference_p> ); >> extern opt_result find_data_references_in_stmt (class loop *, gimple *, >> vec<data_reference_p> *); >> +class oacc_context; >> extern bool graphite_find_data_references_in_stmt (edge, loop_p, gimple *, >> - vec<data_reference_p> *); >> + vec<data_reference_p> *, >> + oacc_context *, bool); >> tree find_data_references_in_loop (class loop *, vec<data_reference_p> *); >> bool loop_nest_has_data_refs (loop_p loop); >> struct data_reference *create_data_ref (edge, loop_p, tree, gimple *, bool, >> - bool); >> + bool, bool = false); >> extern bool find_loop_nest (class loop *, vec<loop_p> *); >> extern struct data_dependence_relation *initialize_data_dependence_relation >> (struct data_reference *, struct data_reference *, vec<loop_p>); >> diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c >> index 888af48946f..1ac27569a03 100644 >> --- a/gcc/tree-loop-distribution.c >> +++ b/gcc/tree-loop-distribution.c >> @@ -2572,15 +2572,24 @@ latch_dominated_by_data_ref (class loop *loop, >> data_reference *dr) >> /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's >> data dependence relations ALIAS_DDRS. */ >> >> -static void >> +void >> compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs, >> vec<dr_with_seg_len_pair_t> *comp_alias_pairs) >> { >> unsigned int i; >> unsigned HOST_WIDE_INT factor = 1; >> - tree niters_plus_one, niters = number_of_latch_executions (loop); >> + tree niters_plus_one, niters; >> >> + if (loop->num == 0) >> + { >> + /* Loop 0 is not at real loop and hence it has no niter information. >> + It executes once. */ >> + niters = build_int_cst (integer_type_node, 1); >> + } >> + else >> + niters = number_of_latch_executions (loop); >> gcc_assert (niters != NULL_TREE && niters != chrec_dont_know); >> + >> niters = fold_convert (sizetype, niters); >> niters_plus_one = size_binop (PLUS_EXPR, niters, size_one_node); >> >> @@ -2595,12 +2604,12 @@ compute_alias_check_pairs (class loop *loop, >> vec<ddr_p> *alias_ddrs, >> struct data_reference *dr_b = DDR_B (ddr); >> tree seg_length_a, seg_length_b; >> >> - if (latch_dominated_by_data_ref (loop, dr_a)) >> + if (loop->num != 0 && latch_dominated_by_data_ref (loop, dr_a)) >> seg_length_a = data_ref_segment_size (dr_a, niters_plus_one); >> else >> seg_length_a = data_ref_segment_size (dr_a, niters); >> >> - if (latch_dominated_by_data_ref (loop, dr_b)) >> + if (loop->num != 0 && latch_dominated_by_data_ref (loop, dr_b)) >> seg_length_b = data_ref_segment_size (dr_b, niters_plus_one); >> else >> seg_length_b = data_ref_segment_size (dr_b, niters); >> diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c >> index edab778277b..466aa65d899 100644 >> --- a/gcc/tree-scalar-evolution.c >> +++ b/gcc/tree-scalar-evolution.c >> @@ -264,6 +264,8 @@ along with GCC; see the file COPYING3. If not see >> #include "gimple.h" >> #include "ssa.h" >> #include "gimple-pretty-print.h" >> +#include "tree-pretty-print.h" >> +#include "print-tree.h" >> #include "fold-const.h" >> #include "gimplify.h" >> #include "gimple-iterator.h" >> @@ -276,6 +278,7 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-ssa.h" >> #include "cfgloop.h" >> #include "tree-chrec.h" >> +#include "tree-chrec-oacc.h" >> #include "tree-affine.h" >> #include "tree-scalar-evolution.h" >> #include "dumpfile.h" >> @@ -284,6 +287,8 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-into-ssa.h" >> #include "builtins.h" >> #include "case-cfn-macros.h" >> +#include "omp-offload.h" >> +#include "internal-fn.h" >> >> static tree analyze_scalar_evolution_1 (class loop *, tree); >> static tree analyze_scalar_evolution_for_address_of (class loop *loop, >> @@ -550,11 +555,32 @@ get_scalar_evolution (basic_block instantiated_below, >> tree scalar) >> switch (TREE_CODE (scalar)) >> { >> case SSA_NAME: >> + { >> if (SSA_NAME_IS_DEFAULT_DEF (scalar)) >> res = scalar; >> else >> - res = *find_var_scev_info (instantiated_below, scalar); >> + { >> + // TODO-kernels Should no longer be necessary, cf. >> oacc_set_arg_evolutions >> + res = *find_var_scev_info (instantiated_below, scalar); >> + if (res) >> + break; >> + >> + tree name = SSA_NAME_IDENTIFIER (scalar); >> + >> + if (name) >> + { >> + const char* id = IDENTIFIER_POINTER (name); >> + if (strncmp (id, ".bound", 6) == 0 >> + || strncmp (id, ".offset", 7) == 0 >> + || strncmp (id, ".chunk_max", 11) == 0 >> + || strncmp (id, ".chunk_no", 10) == 0 >> + || strncmp (id, ".step", 5) == 0) >> + res = scalar; >> + } >> + >> + } >> break; >> + } >> >> case REAL_CST: >> case FIXED_CST: >> @@ -1115,6 +1141,7 @@ follow_ssa_edge_inner_loop_phi (class loop *outer_loop, >> return follow_ssa_edge_expr (outer_loop, loop_phi_node, ev, halting_phi, >> evolution_of_loop, limit); >> } >> +tree interpret_gimple_call (class loop *loop, gimple *call); >> >> /* Follow the ssa edge into the expression EXPR. >> Return true if the strongly connected component has been found. */ >> @@ -1125,7 +1152,9 @@ follow_ssa_edge_expr (class loop *loop, gimple >> *at_stmt, tree expr, >> int limit) >> { >> enum tree_code code; >> - tree type, rhs0, rhs1 = NULL_TREE; >> + tree type = NULL_TREE; >> + tree rhs0 = NULL_TREE; >> + tree rhs1 = NULL_TREE; >> >> /* The EXPR is one of the following cases: >> - an SSA_NAME, >> @@ -1189,26 +1218,36 @@ tail_recurse: >> >> /* At this level of abstraction, the program is just a set >> of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no >> - other def to be handled. */ >> - if (!is_gimple_assign (def)) >> - return t_false; >> - >> - code = gimple_assign_rhs_code (def); >> - switch (get_gimple_rhs_class (code)) >> + other def to be handled except for OpenACC internal function calls. >> + */ >> + if (is_gimple_assign (def)) { >> + code = gimple_assign_rhs_code (def); >> + switch (get_gimple_rhs_class (code)) >> + { >> + case GIMPLE_BINARY_RHS: >> + rhs0 = gimple_assign_rhs1 (def); >> + rhs1 = gimple_assign_rhs2 (def); >> + break; >> + case GIMPLE_UNARY_RHS: >> + case GIMPLE_SINGLE_RHS: >> + rhs0 = gimple_assign_rhs1 (def); >> + break; >> + default: >> + return t_false; >> + } >> + type = TREE_TYPE (gimple_assign_lhs (def)); >> + at_stmt = def; >> + } >> + else if (is_oacc_ifn_call_def (expr)) { >> + rhs0 = interpret_gimple_call (loop, def); >> + type = TREE_TYPE (gimple_call_lhs (def)); >> + at_stmt = def; >> + } >> + else >> { >> - case GIMPLE_BINARY_RHS: >> - rhs0 = gimple_assign_rhs1 (def); >> - rhs1 = gimple_assign_rhs2 (def); >> - break; >> - case GIMPLE_UNARY_RHS: >> - case GIMPLE_SINGLE_RHS: >> - rhs0 = gimple_assign_rhs1 (def); >> - break; >> - default: >> return t_false; >> } >> - type = TREE_TYPE (gimple_assign_lhs (def)); >> - at_stmt = def; >> + >> } >> else >> { >> @@ -1920,7 +1959,75 @@ interpret_gimple_assign (class loop *loop, gimple >> *stmt) >> gimple_assign_rhs2 (stmt)); >> } >> >> - >> +/* Extract loop information from a OpenACC internal function call. */ >> +tree >> +oacc_ifn_call_extract (gimple *call) { >> + gcc_assert (gimple_call_internal_p (call, IFN_GOACC_LOOP)); >> + >> + enum ifn_goacc_loop_kind code >> + = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, >> 0)); >> + >> + tree expr; >> + switch (code) >> + { >> + case IFN_GOACC_LOOP_STEP: >> + { >> + expr = gimple_call_arg (call, 3); >> + break; >> + } >> + case IFN_GOACC_LOOP_CHUNKS: >> + { >> + expr = gimple_call_arg (call, 4); >> + break; >> + } >> + case IFN_GOACC_LOOP_OFFSET: >> + { >> + expr = gimple_call_arg (call, 6); >> + break; >> + } >> + case IFN_GOACC_LOOP_BOUND: >> + { >> + expr = gimple_call_arg (call, 2); >> + break; >> + } >> + default: >> + gcc_unreachable(); >> + } >> + >> + gcc_assert (scev_is_linear_expression (expr)); >> + return expr; >> +} >> + >> +/* Interpret a gimple call statement. */ >> +tree >> +interpret_gimple_call (class loop *loop, gimple *call) >> +{ >> + /* Only IFN_GOACC_LOOP calls are handled here. >> + SCEV computation for those calls is only really relevant >> + for Graphite's execution on OpenACC functions in the host >> + compiler. */ >> + >> +#ifndef ACCEL_COMPILER >> + if (!gimple_call_internal_p (call, IFN_GOACC_LOOP)) >> + return chrec_dont_know; >> +#else >> + return chrec_dont_know; >> +#endif >> + >> + /* Information about OpenACC loops is encoded in internal function calls. >> + Extract loop information from those calls, but ignore other calls. */ >> + if (!gimple_call_internal_p (call, IFN_GOACC_LOOP)) >> + return chrec_dont_know; >> + >> + tree expr = oacc_ifn_call_extract (call); >> + tree analyzed = analyze_scalar_evolution (loop, expr); >> + gcc_checking_assert (expr == analyzed); >> + >> + tree lhs = gimple_call_lhs (call); >> + gcc_assert (lhs); >> + >> + return chrec_convert (TREE_TYPE (lhs), analyzed, call); >> +} >> >> /* This section contains all the entry points: >> - number_of_iterations_in_loop, >> @@ -1969,6 +2076,10 @@ analyze_scalar_evolution_1 (class loop *loop, tree >> var) >> res = interpret_gimple_assign (loop, def); >> break; >> >> + case GIMPLE_CALL: >> + res = interpret_gimple_call (loop, def); >> + break; >> + >> case GIMPLE_PHI: >> if (loop_phi_node_p (def)) >> res = interpret_loop_phi (loop, as_a <gphi *> (def)); >> @@ -2049,6 +2160,91 @@ analyze_scalar_evolution (class loop *loop, tree var) >> return res; >> } >> >> +/* Check if VAR represents a parameter of an OpenACC region in an >> + offloaded function. That is, check that VAR's defining statement >> + has the shape: >> + >> + VAR2 = *.omp_data_i(D).field >> + VAR = *_VAR2 >> + */ >> +static bool >> +is_oacc_arg (tree var) >> +{ >> + gimple* def = SSA_NAME_DEF_STMT (var); >> + >> + if (!def || !is_gimple_assign (def)) >> + return false; >> + >> + tree rhs = gimple_assign_rhs1 (def); >> + >> + if (TREE_CODE (rhs) != MEM_REF) >> + return false; >> + >> + tree ref = TREE_OPERAND (rhs, 0); >> + >> + if (TREE_CODE (ref) != SSA_NAME) >> + return false; >> + >> + gimple* ref_def = SSA_NAME_DEF_STMT (ref); >> + >> + if (!ref_def || !is_gimple_assign (ref_def)) >> + return false; >> + >> + rhs = gimple_assign_rhs1 (ref_def); >> + if (TREE_CODE (rhs) != COMPONENT_REF) >> + return false; >> + >> + tree base_ref = TREE_OPERAND (rhs, 0); >> + >> + if (TREE_CODE (base_ref) != MEM_REF) >> + return false; >> + >> + tree base = TREE_OPERAND (base_ref, 0); >> + >> + if (!SSA_NAME_IDENTIFIER (base)) >> + return false; >> + >> + char* base_id = >> + const_cast<char*>(IDENTIFIER_POINTER (SSA_NAME_IDENTIFIER (base))); >> + >> + if (strncmp (base_id, ".omp_data_i", 11)) >> + return false; >> + >> + return true; >> +} >> + >> +/* Search for SSA_NAMEs which represent parameters of an offloaded >> + OpenACC region and set their SCEV values to a parametric chrec >> + containing the variable itself. >> + >> + We do not have a way to perform scalar evolution on the function >> + from which an OpenACC outlined function was extracted while >> + executing on the outlined function. Analysing those SSA_NAMEs >> + would lead to chrec_dont_know because of the pointer indirection >> + introduced by the outlining. We are better of treating the >> + names as parameters. */ >> + >> +/* TODO Come up with a way to determine the scalar evolution >> + in the original function */ >> + >> +void >> +oacc_set_arg_evolutions () { >> + unsigned i; >> + tree var; >> + >> + FOR_EACH_SSA_NAME (i, var, cfun) >> + { >> + if (! is_oacc_arg (var)) >> + continue; >> + >> + basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var)); >> + >> + tree *chrec = find_var_scev_info (bb, var); >> + *chrec = var; >> + } >> + >> +} >> + >> /* Analyzes and returns the scalar evolution of VAR address in LOOP. */ >> >> static tree >> @@ -2261,6 +2457,15 @@ instantiate_scev_name (edge instantiate_below, >> class loop *def_loop; >> basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec)); >> >> + /* Do not instantiate names which dereference ".omp_data_i" field. >> + Cf. oacc_set_arg_evolutions. */ >> + if (is_oacc_arg (chrec)) >> + return chrec; >> + >> + if (is_oacc_loop_ifn_call_def (chrec)) >> + return interpret_gimple_call (evolution_loop, SSA_NAME_DEF_STMT >> (chrec)); >> + >> + >> /* A parameter, nothing to do. */ >> if (!def_bb >> || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest)) >> @@ -3221,10 +3426,14 @@ simple_iv_with_niters (class loop *wrto_loop, class >> loop *use_loop, >> || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num)) >> return false; >> >> + tree ev_type = TREE_TYPE (ev); >> + if (is_oacc_loop_ifn_call_def (ev)) >> + type = integer_type_node; >> + >> if (tree_does_not_contain_chrecs (ev)) >> { >> iv->base = ev; >> - iv->step = build_int_cst (TREE_TYPE (ev), 0); >> + iv->step = build_int_cst (ev_type, 0); >> iv->no_overflow = true; >> return true; >> } >> @@ -3240,6 +3449,9 @@ simple_iv_with_niters (class loop *wrto_loop, class >> loop *use_loop, >> return false; >> >> iv->step = CHREC_RIGHT (ev); >> + if (is_oacc_loop_ifn_call_def (iv->step)) >> + iv->step = interpret_gimple_call (use_loop, SSA_NAME_DEF_STMT >> (iv->step)); >> + >> if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST) >> || tree_contains_chrecs (iv->step, NULL)) >> return false; >> @@ -3385,6 +3597,9 @@ expression_expensive_p (tree expr, hash_map<tree, >> uint64_t> &cache, >> return true; >> } >> >> + if (is_oacc_ifn_call_def (expr)) >> + return false; >> + >> bool visited_p; >> uint64_t &local_cost = cache.get_or_insert (expr, &visited_p); >> if (visited_p) >> diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c >> index 6c1268e84ad..0f3d7ce3e76 100644 >> --- a/gcc/tree-ssa-loop-ivcanon.c >> +++ b/gcc/tree-ssa-loop-ivcanon.c >> @@ -64,6 +64,7 @@ along with GCC; see the file COPYING3. If not see >> #include "builtins.h" >> #include "tree-ssa-sccvn.h" >> #include "dbgcnt.h" >> +#include "omp-general.h" >> >> /* Specifies types of loops that may be unrolled. */ >> >> @@ -1256,7 +1257,13 @@ canonicalize_loop_induction_variables (class loop >> *loop, >> populates the loop bounds. */ >> modified |= remove_redundant_iv_tests (loop); >> >> - if (try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, >> + /* Skip unrolling on OpenACC outlined functions. Those functions >> + contain loops (e.g. the top loop for a region) that never iterate >> + and that should not be removed. */ >> + >> + bool skip_unrolling = oacc_get_fn_attrib (cfun->decl); >> + if (!skip_unrolling && >> + try_unroll_loop_completely (loop, exit, niter, may_be_zero, ul, >> maxiter, locus, allow_peel)) >> return true; >> >> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c >> index 7d61ef080eb..c54a0277670 100644 >> --- a/gcc/tree-ssa-loop-niter.c >> +++ b/gcc/tree-ssa-loop-niter.c >> @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3. If not see >> #include "tree-ssa-loop.h" >> #include "cfgloop.h" >> #include "tree-chrec.h" >> +#include "tree-chrec-oacc.h" >> #include "tree-scalar-evolution.h" >> #include "tree-dfa.h" >> >> @@ -1980,6 +1981,9 @@ simplify_replace_tree (tree expr, tree old, tree >> new_tree, >> return (ret ? (do_fold ? fold (ret) : ret) : expr); >> } >> >> +tree oacc_ifn_call_extract (gimple*); >> +tree interpret_gimple_call (class loop *loop, gimple *call); >> + >> /* Expand definitions of ssa names in EXPR as long as they are simple >> enough, and return the new expression. If STOP is specified, stop >> expanding if EXPR equals to it. */ >> @@ -1995,6 +1999,12 @@ expand_simple_operations (tree expr, tree stop, >> hash_map<tree, tree> &cache) >> if (expr == NULL_TREE) >> return expr; >> >> + if (is_oacc_ifn_call_def (expr)) >> + { >> + //expr = oacc_ifn_call_extract (SSA_NAME_DEF_STMT (expr)); >> + expr = interpret_gimple_call (NULL, SSA_NAME_DEF_STMT (expr)); >> + } >> + >> if (is_gimple_min_invariant (expr)) >> return expr; >> >> @@ -2465,6 +2475,9 @@ number_of_iterations_exit_assumptions (class loop >> *loop, edge exit, >> if (iv0_niters && iv1_niters) >> return false; >> >> + type = TREE_TYPE (iv0.step); >> + >> + >> /* We don't want to see undefined signed overflow warnings while >> computing the number of iterations. */ >> fold_defer_overflow_warnings (); >> -- >> 2.17.1 >> >> ----------------- >> Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany >> Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, >> Alexander Walter ----------------- Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter