I've committed this patch to
a) fix the breakage I caused this morning, sorry

b) Add a ptx-specific implementation of the fork_join hook. We only need to keep worker & vector-level forks & joins, and then only when the dimension size is not unity. This showed an awkwardness in that hook's API, so I tweaked it to pass the dimension array that oacc-xform has handy.

c) Add smarts to VRP to know the range of the result of the DIM_POS and DIM_SIZE functions. This allows the new min/max optimizer to remove some MIN_EXPRs that get inserted.

nathan
2015-08-12  Nathan Sidwell  <nat...@codesourcery.com>

	* target.def (fork_join): Adjustt prototype.
	* targhooks.h (default_foacc_fork_join): Adjust.
	* omp-low.c (oacc_xform_dim): Constify DIMS arg.
	(execute_oacc_transform): Defer setting defaulted unknown
	dimensions to 0. Adjust fork_join hook call.
	(default_goacc_fork_join): Adjust.
	* config/nvptx/nvptx.c: Include gimple.h
	(nvptx_xform_fork_join): New.
	(TARGET_GOACC_FORK_JOIN): Override.
	* doc/tm.texi: Rebuilt.

Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c	(revision 226813)
+++ config/nvptx/nvptx.c	(working copy)
@@ -61,6 +61,7 @@
 #include "cfg.h"
 #include "omp-low.h"
 #include "gomp-constants.h"
+#include "gimple.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -3598,6 +3599,25 @@ nvptx_dim_limit (unsigned axis)
   return 0;
 }
 
+/* Determine whether fork & joins are needed.  */
+
+static bool
+nvptx_xform_fork_join (gimple_stmt_iterator *ARG_UNUSED (gsi), gimple stmt,
+		       const int dims[], bool ARG_UNUSED (is_fork))
+{
+  tree arg = gimple_call_arg (stmt, 0);
+  unsigned axis = TREE_INT_CST_LOW (arg);
+
+  /* We only care about worker and vector partitioning.  */
+  if (axis < GOMP_DIM_WORKER)
+    return true;
+
+  /* If the size is 1, there's no partitioning.  */
+  if (dims[axis] == 1)
+    return true;
+
+  return false;
+}
 
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -3699,6 +3719,9 @@ nvptx_dim_limit (unsigned axis)
 #undef TARGET_GOACC_DIM_LIMIT
 #define TARGET_GOACC_DIM_LIMIT nvptx_dim_limit
 
+#undef TARGET_GOACC_FORK_JOIN
+#define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 226813)
+++ doc/tm.texi	(working copy)
@@ -5753,7 +5753,7 @@ This hook should return the maximum size
 or zero if unbounded.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (bool, gimple_stmt_iterator *@var{}, @var{gimple})
+@deftypefn {Target Hook} bool TARGET_GOACC_FORK_JOIN (gimple_stmt_iterator *@var{}, @var{gimple}, const @var{int[]}, @var{bool})
 This hook should convert IFN_GOACC_FORK and IFN_GOACC_JOIN function
 calls to target-specific gimple.  It is executed during the oacc_xform
 pass.  It should return true, if the functions should be deleted.  The
Index: omp-low.c
===================================================================
--- omp-low.c	(revision 226813)
+++ omp-low.c	(working copy)
@@ -14589,7 +14589,7 @@ oacc_xform_on_device (gimple_stmt_iterat
 
 static void
 oacc_xform_dim (gimple_stmt_iterator *gsi, gimple stmt,
-		int dims[], bool is_pos)
+		const int dims[], bool is_pos)
 {
   tree arg = gimple_call_arg (stmt, 0);
   unsigned axis = (unsigned)TREE_INT_CST_LOW (arg);
@@ -14636,12 +14636,12 @@ execute_oacc_transform ()
     for (ix = 0; ix != GOMP_DIM_MAX; ix++)
       {
 	if (!pos)
-	  dims[ix] = 0;
+	  dims[ix] = -1;
 	else
 	  {
 	    tree val = TREE_VALUE (pos);
 	    
-	    dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
+	    dims[ix] = val ? TREE_INT_CST_LOW (val) : -2;
 	    pos = TREE_CHAIN (pos);
 	  }
       }
@@ -14652,7 +14652,7 @@ execute_oacc_transform ()
     for (ix = 0; ix != GOMP_DIM_MAX; ix++)
       if (dims[ix] < 0)
 	{
-	  dims[ix] = 1;
+	  dims[ix] = (int)(dims[ix] < -1);
 	  changed = true;
 	}
   
@@ -14699,7 +14699,7 @@ execute_oacc_transform ()
 		    case IFN_GOACC_FORK:
 		    case IFN_GOACC_JOIN:
 		      if (targetm.goacc.fork_join
-			  (ifn_code == IFN_GOACC_FORK, &gsi, stmt))
+			  (&gsi, stmt, dims, ifn_code == IFN_GOACC_FORK))
 			{
 			  replace_uses_by (gimple_vdef (stmt),
 					   gimple_vuse (stmt));
@@ -14754,24 +14754,25 @@ default_goacc_dim_limit (unsigned ARG_UN
    there is no RTL expander.  */
 
 bool
-default_goacc_fork_join (bool is_fork, gimple_stmt_iterator *ARG_UNUSED (gsi),
-			 gimple ARG_UNUSED (stmt))
+default_goacc_fork_join (gimple_stmt_iterator *ARG_UNUSED (gsi),
+			 gimple ARG_UNUSED (stmt),
+			 const int *ARG_UNUSED (dims), bool is_fork)
 {
+  /* If there is no expander, we can delete the functions.  */
   if (is_fork)
     {
-#ifdef HAVE_oacc_fork
-      return false;
+#ifndef HAVE_oacc_fork
+      return true;
 #endif
     }
   else
     {
-#ifdef HAVE_oacc_join
-      return false;
+#ifndef HAVE_oacc_join
+      return true;
 #endif
     }
 
-  /* We have no expander, so delete the functions now.  */
-  return true;
+  return false;
 }
 
 namespace {
Index: target.def
===================================================================
--- target.def	(revision 226813)
+++ target.def	(working copy)
@@ -1667,7 +1667,7 @@ DEFHOOK
 calls to target-specific gimple.  It is executed during the oacc_xform\n\
 pass.  It should return true, if the functions should be deleted.  The\n\
 default hook returns true, if there is no RTL expanders for them.",
-bool, (bool, gimple_stmt_iterator *, gimple),
+bool, (gimple_stmt_iterator *, gimple, const int[], bool),
 default_goacc_fork_join)
 
 HOOK_VECTOR_END (goacc)
Index: targhooks.h
===================================================================
--- targhooks.h	(revision 226813)
+++ targhooks.h	(working copy)
@@ -109,7 +109,8 @@ extern void default_destroy_cost_data (v
 
 extern bool default_goacc_validate_dims (tree, int []);
 extern unsigned default_goacc_dim_limit (unsigned);
-extern bool default_goacc_fork_join (bool, gimple_stmt_iterator *, gimple);
+extern bool default_goacc_fork_join (gimple_stmt_iterator *, gimple,
+				     const int [], bool);
 
 /* These are here, and not in hooks.[ch], because not all users of
    hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS.  */
Index: tree-vrp.c
===================================================================
--- tree-vrp.c	(revision 226813)
+++ tree-vrp.c	(working copy)
@@ -66,8 +66,8 @@ along with GCC; see the file COPYING3.
 #include "optabs.h"
 #include "tree-ssa-scopedtables.h"
 #include "tree-ssa-threadedge.h"
-
-
+#include "omp-low.h"
+#include "target.h"
 
 /* Range of values that can be associated with an SSA_NAME after VRP
    has executed.  */
@@ -4126,7 +4126,9 @@ extract_range_basic (value_range_t *vr,
   else if (is_gimple_call (stmt) && gimple_call_internal_p (stmt))
     {
       enum tree_code subcode = ERROR_MARK;
-      switch (gimple_call_internal_fn (stmt))
+      unsigned ifn_code = gimple_call_internal_fn (stmt);
+      
+      switch (ifn_code)
 	{
 	case IFN_UBSAN_CHECK_ADD:
 	  subcode = PLUS_EXPR;
@@ -4137,6 +4139,33 @@ extract_range_basic (value_range_t *vr,
 	case IFN_UBSAN_CHECK_MUL:
 	  subcode = MULT_EXPR;
 	  break;
+	case IFN_GOACC_DIM_SIZE:
+	case IFN_GOACC_DIM_POS:
+	  /* Optimizing these two internal functions helps the loop
+	     optimizer elimitate outer comparisons.  Size is [1,N]
+	     and pos is [0,N-1].  */
+	  {
+	    bool is_pos = ifn_code == IFN_GOACC_DIM_POS;
+	    tree attr = get_oacc_fn_attrib (current_function_decl);
+	    tree arg = gimple_call_arg (stmt, 0);
+	    unsigned axis = (unsigned)TREE_INT_CST_LOW (arg);
+	    tree dims = TREE_VALUE (attr);
+
+	    for (unsigned ix = axis; ix--;)
+	      dims = TREE_CHAIN (dims);
+	    int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
+
+	    if (!size)
+	      size = targetm.goacc.dim_limit (axis);
+	    if (size)
+	      set_value_range (vr, VR_RANGE,
+			       build_int_cst (integer_type_node, !is_pos),
+			       build_int_cst (integer_type_node,
+					      size - is_pos), NULL);
+	    return;
+	  }
+	  break;
+	  
 	default:
 	  break;
 	}

Reply via email to