Re: [gomp4] Remove some ptxness from middle end

2015-06-22 Thread Marek Polacek
On Mon, Jun 22, 2015 at 01:00:51PM -0400, Nathan Sidwell wrote:
 +  if (GET_CODE (arg) != CONST_INT
 +  || (unsigned HOST_WIDE_INT)INTVAL (arg) = OACC_HWM)

Don't we have UINTVAL for this?  So UINTVAL (arg).

Marek


Re: [gomp4] Remove some ptxness from middle end

2015-06-22 Thread Nathan Sidwell

On 06/22/15 13:04, Marek Polacek wrote:

On Mon, Jun 22, 2015 at 01:00:51PM -0400, Nathan Sidwell wrote:

+  if (GET_CODE (arg) != CONST_INT
+  || (unsigned HOST_WIDE_INT)INTVAL (arg) = OACC_HWM)


Don't we have UINTVAL for this?  So UINTVAL (arg).


Oh, thanks! will fix

nathan

--
Nathan Sidwell


[gomp4] Remove some ptxness from middle end

2015-06-22 Thread Nathan Sidwell
I've committed this patch to the gomp4 branch, after testing.  It does a number 
of cleanups


1) removes the ptx-specific TID, NTID, CTAID  NCTAID builtins, replacing them 
with openacc-specific GOACC_id and GOACC_nid builtins, using gang/worker  
vector level enumeration.  These are mapped by the PTX backend to PTX-specifc 
instructions.


2) Created a  oacc_loop_levels enumeration, and generate the loop nest masks 
from that.


3) Removed a bunch of duplicate calculations in omp-low related to determining 
number of threads and thread index. With #2 it becomes easier to use a loop.


nathan
--
Nathan Sidwell
2015-06-20  Nathan Sidwell  nat...@codesourcery.com

	gcc/
	* omp-builtins.def (BUILT_IN_GOACC_NTID, BUILTIN_NCTAID): Replace
	with ...
	(BUILT_IN_GOACC_NID): ... this.
	(BUILT_IN_GOACC_TID, BUILTIN_CTAID): Replace with ...
	(BUILT_IN_GOACC_ID): ... this.
	* builtins.c: Include omp-low.h.
	(expand_oacc_buoltin): Replace with ...
	(expand_oacc_id): ... this.
	(expand_builtin, is_simple_builtin): Adjust.oo
	* omp-low.h (enum oacc_loop_levels): New.
	* omp-low.c (MASK_GANG, MASK_WORKER, MASK_VECTOR): Replace with ...
	(OACC_LOOP_MASK): ... this.
	(scan_omp_for, scan_omp_target): Adjust.
	(expand_oacc_get_num_threads): Adjust and use a loop.
	(expand_oacc_get_thread_num): Likewise.
	(oacc_loop_needs_thread_barrier_p, find_omp_for_region_gwv,
	find_omp_taarget_region_data, required_predication_mask,
	generate_vector_broadcast, generate_oacc_broadcast): Adjust.
	(make_predication_test): Adjust and use a loop.
	(predicate_bb, oacc_broadcast, oacc_init_count_vars): Adjust.
	* config/nvptx/nvptx.md (UNSPEC_NTID, UNSPEC_TID, UNSPEC_NCTAID,
	UNSPEC_CTAID): Replace with ...
	(UNSPEC_NID, UNSPEC_ID): ... these.
	(*oacc_ntid_insn, oacc_ntid, *oacc_tid_insn, oacc_tid,
	*oacc_nctaid_insn, oacc_nctaid, *oacc_ctaid_insn,
	oacc_ctaid): Replace with ...
	(oacc_nid, oacc_id): ... these.
	* config/nvptx/nvptx.c (nvptx_print_operand [CASE 'd']): Remove.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/gang-static-2.c: Replace
	GOACC_ctaid builtin with GOACC_id.

Index: libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c
===
--- libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c	(revision 224671)
+++ libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c	(working copy)
@@ -35,38 +35,38 @@ main ()
 
 #pragma acc parallel loop gang (static:*) num_gangs (10)
   for (i = 0; i  100; i++)
-a[i] = __builtin_GOACC_ctaid (0);
+a[i] = __builtin_GOACC_id (0);
 
   test_nonstatic (a, 10);
 
 #pragma acc parallel loop gang (static:1) num_gangs (10)
   for (i = 0; i  100; i++)
-a[i] = __builtin_GOACC_ctaid (0);
+a[i] = __builtin_GOACC_id (0);
 
   test_static (a, 10, 1);
 
 #pragma acc parallel loop gang (static:2) num_gangs (10)
   for (i = 0; i  100; i++)
-a[i] = __builtin_GOACC_ctaid (0);
+a[i] = __builtin_GOACC_id (0);
 
   test_static (a, 10, 2);
 
 #pragma acc parallel loop gang (static:5) num_gangs (10)
   for (i = 0; i  100; i++)
-a[i] = __builtin_GOACC_ctaid (0);
+a[i] = __builtin_GOACC_id (0);
 
   test_static (a, 10, 5);
 
 #pragma acc parallel loop gang (static:20) num_gangs (10)
   for (i = 0; i  100; i++)
-a[i] = __builtin_GOACC_ctaid (0);
+a[i] = __builtin_GOACC_id (0);
 
   test_static (a, 10, 20);
 
   /* Non-static gang.  */
 #pragma acc parallel loop gang num_gangs (10)
   for (i = 0; i  100; i++)
-a[i] = __builtin_GOACC_ctaid (0);
+a[i] = __builtin_GOACC_id (0);
 
   test_nonstatic (a, 10);
 
Index: gcc/omp-builtins.def
===
--- gcc/omp-builtins.def	(revision 224671)
+++ gcc/omp-builtins.def	(working copy)
@@ -61,13 +61,9 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, GOACC_wait,
 		   BT_FN_VOID_INT_INT_VAR,
 		   ATTR_NOTHROW_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NTID, GOACC_ntid,
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ID, GOACC_id,
 		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_TID, GOACC_tid,
-		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NCTAID, GOACC_nctaid,
-		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
-DEF_GOACC_BUILTIN (BUILT_IN_GOACC_CTAID, GOACC_ctaid,
+DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NID, GOACC_nid,
 		   BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, GOACC_get_ganglocal_ptr,
 		   BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST)
Index: gcc/config/nvptx/nvptx.md
===
--- gcc/config/nvptx/nvptx.md	(revision 224671)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -49,10 +49,8 @@
 
UNSPEC_ALLOCA
 
-   UNSPEC_NTID
-   UNSPEC_TID
-   UNSPEC_NCTAID
-   UNSPEC_CTAID
+   UNSPEC_NID
+   UNSPEC_ID
 
UNSPEC_SHARED_DATA
 ])
@@ -1263,65 +1261,32 @@
   DONE;
 })
 
-(define_insn 

Re: [gomp4] Remove some ptxness from middle end

2015-06-22 Thread Nathan Sidwell

On 06/22/15 13:04, Marek Polacek wrote:

On Mon, Jun 22, 2015 at 01:00:51PM -0400, Nathan Sidwell wrote:

+  if (GET_CODE (arg) != CONST_INT
+  || (unsigned HOST_WIDE_INT)INTVAL (arg) = OACC_HWM)


Don't we have UINTVAL for this?  So UINTVAL (arg).


Applied the attached, after testing.  Also realized I'd missed some places I 
should have used the new loop level enumeration.


nathan

--
Nathan Sidwell
2015-06-22  Nathan Sidwell  nat...@codesourcery.com

	* omp-low.c (expand_oacc_get_num_threads): Use OACC enum.
	(expand_oacc_get_thread_num, make_predication_test): Likewise.
	* builtins.c (expand_oacc_id): Use UINTVAL.

Index: omp-low.c
===
--- omp-low.c	(revision 224747)
+++ omp-low.c	(working copy)
@@ -4994,8 +4994,8 @@ expand_oacc_get_num_threads (gimple_seq
   tree  decl = builtin_decl_explicit (BUILT_IN_GOACC_NID);
   unsigned ix;
 
-  for (ix = 0; (1  ix) = gwv_bits; ix++)
-if ((1  ix)  gwv_bits)
+  for (ix = OACC_gang; ix != OACC_HWM; ix++)
+if (OACC_LOOP_MASK(ix)  gwv_bits)
   {
 	tree arg = build_int_cst (unsigned_type_node, ix);
 	tree count = create_tmp_var (unsigned_type_node);
@@ -5022,8 +5022,8 @@ expand_oacc_get_thread_num (gimple_seq *
   unsigned ix;
 
   /* Start at gang level, and examine relevant dimension indices.  */
-  for (ix = 0; (1  ix) = gwv_bits; ix++)
-if ((1  ix)  gwv_bits)
+  for (ix = OACC_gang; ix != OACC_HWM; ix++)
+if (OACC_LOOP_MASK (ix)  gwv_bits)
   {
 	tree arg = build_int_cst (unsigned_type_node, ix);
 
@@ -10671,7 +10671,7 @@ make_predication_test (edge true_edge, b
   unsigned ix;
 
   for (ix = OACC_worker; ix = OACC_vector; ix++)
-if (mask  (1  ix))
+if (OACC_LOOP_MASK (ix)  mask)
   {
 	gimple call = gimple_build_call
 	  (decl, 1, build_int_cst (unsigned_type_node, ix));
Index: builtins.c
===
--- builtins.c	(revision 224747)
+++ builtins.c	(working copy)
@@ -5971,8 +5971,7 @@ expand_oacc_id (enum built_in_function f
   rtx arg;
 
   arg = expand_normal (arg0);
-  if (GET_CODE (arg) != CONST_INT
-  || (unsigned HOST_WIDE_INT)INTVAL (arg) = OACC_HWM)
+  if (GET_CODE (arg) != CONST_INT || UINTVAL (arg) = OACC_HWM)
 {
   error (argument to %D must be constant in range 0 to %d,
 	 get_callee_fndecl (exp), OACC_HWM - 1);