On 11/04/2015 02:24 AM, Jakub Jelinek wrote: > Have you verified pt.c does the right thing when instantiating the > OMP_CLAUSE_TILE clause (I mean primarily the TREE_VEC in there)? > There really should be testcases for that.
Here's a patch which adds template support for the oacc clauses. Is it ok for trunk? Cesar
2015-11-04 Cesar Philippidis <ce...@codesourcery.com> gcc/cp/ * pt.c (tsubst_omp_clauses): Add support for OMP_CLAUSE_{NUM_GANGS, NUM_WORKERS,VECTOR_LENGTH,GANG,WORKER,VECTOR,ASYNC,WAIT,TILE,AUTO, INDEPENDENT,SEQ}. (tsubst_expr): Add support for OMP_CLAUSE_{KERNELS,PARALLEL,LOOP}. gcc/testsuite/ * g++.dg/goacc/template-reduction.C: New test. * g++.dg/goacc/template.C: New test. diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c index e3f55a7..4424596 100644 --- a/gcc/cp/pt.c +++ b/gcc/cp/pt.c @@ -14395,6 +14395,15 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields, case OMP_CLAUSE_PRIORITY: case OMP_CLAUSE_ORDERED: case OMP_CLAUSE_HINT: + case OMP_CLAUSE_NUM_GANGS: + case OMP_CLAUSE_NUM_WORKERS: + case OMP_CLAUSE_VECTOR_LENGTH: + case OMP_CLAUSE_GANG: + case OMP_CLAUSE_WORKER: + case OMP_CLAUSE_VECTOR: + case OMP_CLAUSE_ASYNC: + case OMP_CLAUSE_WAIT: + case OMP_CLAUSE_TILE: OMP_CLAUSE_OPERAND (nc, 0) = tsubst_expr (OMP_CLAUSE_OPERAND (oc, 0), args, complain, in_decl, /*integral_constant_expression_p=*/false); @@ -14449,6 +14458,9 @@ tsubst_omp_clauses (tree clauses, bool declare_simd, bool allow_fields, case OMP_CLAUSE_THREADS: case OMP_CLAUSE_SIMD: case OMP_CLAUSE_DEFAULTMAP: + case OMP_CLAUSE_INDEPENDENT: + case OMP_CLAUSE_AUTO: + case OMP_CLAUSE_SEQ: break; default: gcc_unreachable (); @@ -15197,6 +15209,15 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, } break; + case OACC_KERNELS: + case OACC_PARALLEL: + tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, false, args, complain, + in_decl); + stmt = begin_omp_parallel (); + RECUR (OMP_BODY (t)); + finish_omp_construct (TREE_CODE (t), stmt, tmp); + break; + case OMP_PARALLEL: r = push_omp_privatization_clauses (OMP_PARALLEL_COMBINED (t)); tmp = tsubst_omp_clauses (OMP_PARALLEL_CLAUSES (t), false, true, @@ -15227,6 +15248,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, case CILK_FOR: case OMP_DISTRIBUTE: case OMP_TASKLOOP: + case OACC_LOOP: { tree clauses, body, pre_body; tree declv = NULL_TREE, initv = NULL_TREE, condv = NULL_TREE; @@ -15235,7 +15257,8 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, int i; r = push_omp_privatization_clauses (OMP_FOR_INIT (t) == NULL_TREE); - clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false, true, + clauses = tsubst_omp_clauses (OMP_FOR_CLAUSES (t), false, + TREE_CODE (t) != OACC_LOOP, args, complain, in_decl); if (OMP_FOR_INIT (t) != NULL_TREE) { @@ -15305,9 +15328,11 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, pop_omp_privatization_clauses (r); break; + case OACC_DATA: case OMP_TARGET_DATA: case OMP_TARGET: - tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, true, + tmp = tsubst_omp_clauses (OMP_CLAUSES (t), false, + TREE_CODE (t) != OACC_DATA, args, complain, in_decl); keep_next_level (true); stmt = begin_omp_structured_block (); @@ -15331,6 +15356,16 @@ tsubst_expr (tree t, tree args, tsubst_flags_t complain, tree in_decl, add_stmt (t); break; + case OACC_ENTER_DATA: + case OACC_EXIT_DATA: + case OACC_UPDATE: + tmp = tsubst_omp_clauses (OMP_STANDALONE_CLAUSES (t), false, false, + args, complain, in_decl); + t = copy_node (t); + OMP_STANDALONE_CLAUSES (t) = tmp; + add_stmt (t); + break; + case OMP_ORDERED: tmp = tsubst_omp_clauses (OMP_ORDERED_CLAUSES (t), false, true, args, complain, in_decl); diff --git a/gcc/testsuite/g++.dg/goacc/template-reduction.C b/gcc/testsuite/g++.dg/goacc/template-reduction.C new file mode 100644 index 0000000..668eeb3 --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/template-reduction.C @@ -0,0 +1,104 @@ +// This error is temporary. Remove when support is added for these clauses +// in the middle end. +// { dg-prune-output "sorry, unimplemented" } + +extern void abort (); + +const int n = 100; + +// Check explicit template copy map + +template<typename T> T +sum (T array[]) +{ + T s = 0; + +#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s, array[0:n]) + for (int i = 0; i < n; i++) + s += array[i]; + + return s; +} + +// Check implicit template copy map + +template<typename T> T +sum () +{ + T s = 0; + T array[n]; + + for (int i = 0; i < n; i++) + array[i] = i+1; + +#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy (s) + for (int i = 0; i < n; i++) + s += array[i]; + + return s; +} + +// Check present and async + +template<typename T> T +async_sum (T array[]) +{ + T s = 0; + +#pragma acc parallel loop num_gangs (10) gang async (1) present (array[0:n]) + for (int i = 0; i < n; i++) + array[i] = i+1; + +#pragma acc parallel loop num_gangs (10) gang reduction (+:s) present (array[0:n]) copy (s) async wait (1) + for (int i = 0; i < n; i++) + s += array[i]; + +#pragma acc wait + + return s; +} + +// Check present and async and an explicit firstprivate + +template<typename T> T +async_sum (int c) +{ + T s = 0; + +#pragma acc parallel loop num_gangs (10) gang reduction (+:s) copy(s) firstprivate (c) async wait (1) + for (int i = 0; i < n; i++) + s += i+c; + +#pragma acc wait + + return s; +} + +int +main() +{ + int a[n]; + int result = 0; + + for (int i = 0; i < n; i++) + { + a[i] = i+1; + result += i+1; + } + + if (sum (a) != result) + abort (); + + if (sum<int> () != result) + abort (); + +#pragma acc enter data copyin (a) + if (async_sum (a) != result) + abort (); + + if (async_sum<int> (1) != result) + abort (); +#pragma acc exit data delete (a) + + return 0; +} diff --git a/gcc/testsuite/g++.dg/goacc/template.C b/gcc/testsuite/g++.dg/goacc/template.C new file mode 100644 index 0000000..f899d6a --- /dev/null +++ b/gcc/testsuite/g++.dg/goacc/template.C @@ -0,0 +1,140 @@ +// This error is temporary. Remove when support is added for these clauses +// in the middle end. +// { dg-prune-output "sorry, unimplemented" } + +#pragma acc routine +template <typename T> T +accDouble(int val) +{ + return val * 2; +} + +template<typename T> T +oacc_parallel_copy (T a) +{ + T b = 0; + char w = 1; + int x = 2; + float y = 3; + double z = 4; + +#pragma acc parallel num_gangs (a) num_workers (a) vector_length (a) default (none) copyout (b) copyin (a) + { + b = a; + } + +#pragma acc parallel num_gangs (a) copy (w, x, y, z) + { + w = accDouble<char>(w); + x = accDouble<int>(x); + y = accDouble<float>(y); + z = accDouble<double>(z); + } + +#pragma acc parallel num_gangs (a) if (1) + { +#pragma acc loop auto tile (a, 3) + for (int i = 0; i < a; i++) + for (int j = 0; j < 5; j++) + b = a; + +#pragma acc loop seq + for (int i = 0; i < a; i++) + b = a; + } + + T c; + +#pragma acc parallel num_workers (10) + { +#pragma acc atomic capture + c = b++; + +#pragma atomic update + c++; + +#pragma acc atomic read + b = a; + +#pragma acc atomic write + b = a; + } + +#pragma acc parallel reduction (+:c) + { + c = 1; + } + +#pragma acc data if (1) copy (b) + { + #pragma acc parallel + { + b = a; + } + } + +#pragma acc enter data copyin (b) +#pragma acc parallel present (b) + { + b = a; + } + +#pragma acc update host (b) +#pragma acc update self (b) +#pragma acc update device (b) +#pragma acc exit data delete (b) + + return b; +} + +template<typename T> T +oacc_kernels_copy (T a) +{ + T b = 0; + T c = 0; + char w = 1; + int x = 2; + float y = 3; + double z = 4; + +#pragma acc kernels copy (w, x, y, z) + { + w = accDouble<char>(w); + x = accDouble<int>(x); + y = accDouble<float>(y); + z = accDouble<double>(z); + } + +#pragma acc kernels copyout (b) copyin (a) + b = a; + +#pragma acc kernels loop reduction (+:c) + for (int i = 0; i < 10; i++) + { + c = 1; + } + +#pragma acc data if (1) copy (b) + { + #pragma acc kernels + { + b = a; + } + } + +#pragma acc enter data copyin (b) +#pragma acc kernels present (b) + { + b = a; + } + return b; +} + +int +main () +{ + int b = oacc_parallel_copy<int> (5); + int c = oacc_kernels_copy<int> (5); + + return b + c; +}