I've committed this patch to a) fix the breakage I caused this morning, sorry
b) Add a ptx-specific implementation of the fork_join hook. We only need to keep worker & vector-level forks & joins, and then only when the dimension size is not unity. This showed an awkwardness in that hook's API, so I tweaked it to pass the dimension array that oacc-xform has handy.
c) Add smarts to VRP to know the range of the result of the DIM_POS and DIM_SIZE functions. This allows the new min/max optimizer to remove some MIN_EXPRs that get inserted.
nathan
2015-08-12 Nathan Sidwell <nat...@codesourcery.com> * target.def (fork_join): Adjustt prototype. * targhooks.h (default_foacc_fork_join): Adjust. * omp-low.c (oacc_xform_dim): Constify DIMS arg. (execute_oacc_transform): Defer setting defaulted unknown dimensions to 0. Adjust fork_join hook call. (default_goacc_fork_join): Adjust. * config/nvptx/nvptx.c: Include gimple.h (nvptx_xform_fork_join): New. (TARGET_GOACC_FORK_JOIN): Override. * doc/tm.texi: Rebuilt. Index: config/nvptx/nvptx.c =================================================================== --- config/nvptx/nvptx.c (revision 226813) +++ config/nvptx/nvptx.c (working copy) @@ -61,6 +61,7 @@ #include "cfg.h" #include "omp-low.h" #include "gomp-constants.h" +#include "gimple.h" /* This file should be included last. */ #include "target-def.h" @@ -3598,6 +3599,25 @@ nvptx_dim_limit (unsigned axis) return 0; } +/* Determine whether fork & joins are needed. */ + +static bool +nvptx_xform_fork_join (gimple_stmt_iterator *ARG_UNUSED (gsi), gimple stmt, + const int dims[], bool ARG_UNUSED (is_fork)) +{ + tree arg = gimple_call_arg (stmt, 0); + unsigned axis = TREE_INT_CST_LOW (arg); + + /* We only care about worker and vector partitioning. */ + if (axis < GOMP_DIM_WORKER) + return true; + + /* If the size is 1, there's no partitioning. */ + if (dims[axis] == 1) + return true; + + return false; +} #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override @@ -3699,6 +3719,9 @@ nvptx_dim_limit (unsigned axis) #undef TARGET_GOACC_DIM_LIMIT #define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit +#undef TARGET_GOACC_FORK_JOIN +#define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-nvptx.h" Index: doc/tm.texi =================================================================== --- doc/tm.texi (revision 226813) +++ doc/tm.texi (working copy) @@ -5753,7 +5753,7 @@ This hook should return the maximum size or zero if unbounded. @end deftypefn -@deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (bool, gimple_stmt_iterator *@var{}, @var{gimple}) +@deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gimple_stmt_iterator *@var{}, @var{gimple}, const @var{int[]}, @var{bool}) This hook should convert IFN_GOACC_FORK and IFN_GOACC_JOIN function calls to target-specific gimple. It is executed during the oacc_xform pass. It should return true, if the functions should be deleted. The Index: omp-low.c =================================================================== --- omp-low.c (revision 226813) +++ omp-low.c (working copy) @@ -14589,7 +14589,7 @@ oacc_xform_on_device (gimple_stmt_iterat static void oacc_xform_dim (gimple_stmt_iterator *gsi, gimple stmt, - int dims[], bool is_pos) + const int dims[], bool is_pos) { tree arg = gimple_call_arg (stmt, 0); unsigned axis = (unsigned)TREE_INT_CST_LOW (arg); @@ -14636,12 +14636,12 @@ execute_oacc_transform () for (ix = 0; ix != GOMP_DIM_MAX; ix++) { if (!pos) - dims[ix] = 0; + dims[ix] = -1; else { tree val = TREE_VALUE (pos); - dims[ix] = val ? TREE_INT_CST_LOW (val) : -1; + dims[ix] = val ? TREE_INT_CST_LOW (val) : -2; pos = TREE_CHAIN (pos); } } @@ -14652,7 +14652,7 @@ execute_oacc_transform () for (ix = 0; ix != GOMP_DIM_MAX; ix++) if (dims[ix] < 0) { - dims[ix] = 1; + dims[ix] = (int)(dims[ix] < -1); changed = true; } @@ -14699,7 +14699,7 @@ execute_oacc_transform () case IFN_GOACC_FORK: case IFN_GOACC_JOIN: if (targetm.goacc.fork_join - (ifn_code == IFN_GOACC_FORK, &gsi, stmt)) + (&gsi, stmt, dims, ifn_code == IFN_GOACC_FORK)) { replace_uses_by (gimple_vdef (stmt), gimple_vuse (stmt)); @@ -14754,24 +14754,25 @@ default_goacc_dim_limit (unsigned ARG_UN there is no RTL expander. */ bool -default_goacc_fork_join (bool is_fork, gimple_stmt_iterator *ARG_UNUSED (gsi), - gimple ARG_UNUSED (stmt)) +default_goacc_fork_join (gimple_stmt_iterator *ARG_UNUSED (gsi), + gimple ARG_UNUSED (stmt), + const int *ARG_UNUSED (dims), bool is_fork) { + /* If there is no expander, we can delete the functions. */ if (is_fork) { -#ifdef HAVE_oacc_fork - return false; +#ifndef HAVE_oacc_fork + return true; #endif } else { -#ifdef HAVE_oacc_join - return false; +#ifndef HAVE_oacc_join + return true; #endif } - /* We have no expander, so delete the functions now. */ - return true; + return false; } namespace { Index: target.def =================================================================== --- target.def (revision 226813) +++ target.def (working copy) @@ -1667,7 +1667,7 @@ DEFHOOK calls to target-specific gimple. It is executed during the oacc_xform\n\ pass. It should return true, if the functions should be deleted. The\n\ default hook returns true, if there is no RTL expanders for them.", -bool, (bool, gimple_stmt_iterator *, gimple), +bool, (gimple_stmt_iterator *, gimple, const int[], bool), default_goacc_fork_join) HOOK_VECTOR_END (goacc) Index: targhooks.h =================================================================== --- targhooks.h (revision 226813) +++ targhooks.h (working copy) @@ -109,7 +109,8 @@ extern void default_destroy_cost_data (v extern bool default_goacc_validate_dims (tree, int []); extern unsigned default_goacc_dim_limit (unsigned); -extern bool default_goacc_fork_join (bool, gimple_stmt_iterator *, gimple); +extern bool default_goacc_fork_join (gimple_stmt_iterator *, gimple, + const int [], bool); /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ Index: tree-vrp.c =================================================================== --- tree-vrp.c (revision 226813) +++ tree-vrp.c (working copy) @@ -66,8 +66,8 @@ along with GCC; see the file COPYING3. #include "optabs.h" #include "tree-ssa-scopedtables.h" #include "tree-ssa-threadedge.h" - - +#include "omp-low.h" +#include "target.h" /* Range of values that can be associated with an SSA_NAME after VRP has executed. */ @@ -4126,7 +4126,9 @@ extract_range_basic (value_range_t *vr, else if (is_gimple_call (stmt) && gimple_call_internal_p (stmt)) { enum tree_code subcode = ERROR_MARK; - switch (gimple_call_internal_fn (stmt)) + unsigned ifn_code = gimple_call_internal_fn (stmt); + + switch (ifn_code) { case IFN_UBSAN_CHECK_ADD: subcode = PLUS_EXPR; @@ -4137,6 +4139,33 @@ extract_range_basic (value_range_t *vr, case IFN_UBSAN_CHECK_MUL: subcode = MULT_EXPR; break; + case IFN_GOACC_DIM_SIZE: + case IFN_GOACC_DIM_POS: + /* Optimizing these two internal functions helps the loop + optimizer elimitate outer comparisons. Size is [1,N] + and pos is [0,N-1]. */ + { + bool is_pos = ifn_code == IFN_GOACC_DIM_POS; + tree attr = get_oacc_fn_attrib (current_function_decl); + tree arg = gimple_call_arg (stmt, 0); + unsigned axis = (unsigned)TREE_INT_CST_LOW (arg); + tree dims = TREE_VALUE (attr); + + for (unsigned ix = axis; ix--;) + dims = TREE_CHAIN (dims); + int size = TREE_INT_CST_LOW (TREE_VALUE (dims)); + + if (!size) + size = targetm.goacc.dim_limit (axis); + if (size) + set_value_range (vr, VR_RANGE, + build_int_cst (integer_type_node, !is_pos), + build_int_cst (integer_type_node, + size - is_pos), NULL); + return; + } + break; + default: break; }