Hi! On Sat, 15 Nov 2014 13:14:52 +0100, Tom de Vries <tom_devr...@mentor.com> wrote: > I'm submitting a patch series with initial support for the oacc kernels > directive.
Committed to gomp-4_0-branch in r222288: commit 7109b39defb87bc839983339c9fb4cdcb3891238 Author: tschwinge <tschwinge@138bc75d-0d04-0410-961f-82ee72b054a4> Date: Tue Apr 21 20:32:01 2015 +0000 Handle oacc kernels with other directives Mark directives with fn spec attributes to prevent them from acting as optimization barrier. gcc/ * builtin-attrs.def (DOT_DOT_r_r_r): Add DEF_ATTR_FOR_STRING. (ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST): Add DEF_ATTR_TREE_LIST. * omp-builtins.def (BUILT_IN_GOACC_DATA_START) (BUILT_IN_GOACC_ENTER_EXIT_DATA, BUILT_IN_GOACC_UPDATE): Use DEF_GOACC_BUILTIN_FNSPEC instead of DEF_GOACC_BUILTIN. gcc/testsuite/ * c-c++-common/goacc/kernels-loop-data-2.c: New test. * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test. * c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test. * c-c++-common/goacc/kernels-loop-data-update.c: New test. * c-c++-common/goacc/kernels-loop-data.c: New test. * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New test. * gfortran.dg/goacc/kernels-loop-data-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: New test. * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: New test. * gfortran.dg/goacc/kernels-loop-data-update.f95: New test. * gfortran.dg/goacc/kernels-loop-data.f95: New test. * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: New test. * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop-data.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95: New test. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@222288 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc/ChangeLog.gomp | 6 ++ gcc/builtin-attrs.def | 3 + gcc/omp-builtins.def | 21 +++--- gcc/testsuite/ChangeLog.gomp | 15 +++++ .../c-c++-common/goacc/kernels-loop-data-2.c | 71 ++++++++++++++++++++ .../goacc/kernels-loop-data-enter-exit-2.c | 69 +++++++++++++++++++ .../goacc/kernels-loop-data-enter-exit.c | 66 ++++++++++++++++++ .../c-c++-common/goacc/kernels-loop-data-update.c | 66 ++++++++++++++++++ .../c-c++-common/goacc/kernels-loop-data.c | 65 ++++++++++++++++++ .../goacc/kernels-parallel-loop-data-enter-exit.c | 67 ++++++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data-2.f95 | 52 ++++++++++++++ .../goacc/kernels-loop-data-enter-exit-2.f95 | 52 ++++++++++++++ .../goacc/kernels-loop-data-enter-exit.f95 | 50 ++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data-update.f95 | 49 ++++++++++++++ .../gfortran.dg/goacc/kernels-loop-data.f95 | 50 ++++++++++++++ .../kernels-parallel-loop-data-enter-exit.f95 | 51 ++++++++++++++ libgomp/ChangeLog.gomp | 24 +++++++ .../kernels-loop-data-2.c | 56 +++++++++++++++ .../kernels-loop-data-enter-exit-2.c | 54 +++++++++++++++ .../kernels-loop-data-enter-exit.c | 51 ++++++++++++++ .../kernels-loop-data-update.c | 53 +++++++++++++++ .../libgomp.oacc-c-c++-common/kernels-loop-data.c | 50 ++++++++++++++ .../kernels-parallel-loop-data-enter-exit.c | 52 ++++++++++++++ .../libgomp.oacc-fortran/kernels-loop-data-2.f95 | 38 +++++++++++ .../kernels-loop-data-enter-exit-2.f95 | 38 +++++++++++ .../kernels-loop-data-enter-exit.f95 | 36 ++++++++++ .../kernels-loop-data-update.f95 | 36 ++++++++++ .../libgomp.oacc-fortran/kernels-loop-data.f95 | 36 ++++++++++ .../kernels-parallel-loop-data-enter-exit.f95 | 37 ++++++++++ 29 files changed, 1306 insertions(+), 8 deletions(-) diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp index b1933ba..1e12554 100644 --- gcc/ChangeLog.gomp +++ gcc/ChangeLog.gomp @@ -1,5 +1,11 @@ 2015-04-21 Tom de Vries <t...@codesourcery.com> + * builtin-attrs.def (DOT_DOT_r_r_r): Add DEF_ATTR_FOR_STRING. + (ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST): Add DEF_ATTR_TREE_LIST. + * omp-builtins.def (BUILT_IN_GOACC_DATA_START) + (BUILT_IN_GOACC_ENTER_EXIT_DATA, BUILT_IN_GOACC_UPDATE): Use + DEF_GOACC_BUILTIN_FNSPEC instead of DEF_GOACC_BUILTIN. + * passes.def: Add pass_fre after pass_ch_oacc_kernels. * passes.def: Add pass_scev_cprop to pass_oacc_kernels. diff --git gcc/builtin-attrs.def gcc/builtin-attrs.def index 8eca053..2897c19 100644 --- gcc/builtin-attrs.def +++ gcc/builtin-attrs.def @@ -65,6 +65,7 @@ DEF_ATTR_FOR_INT (6) ATTR_##ENUM, ATTR_NULL) DEF_ATTR_FOR_STRING (STR1, "1") DEF_ATTR_FOR_STRING (DOT_DOT_DOT_r_r_r, "...rrr") +DEF_ATTR_FOR_STRING (DOT_DOT_r_r_r, "..rrr") #undef DEF_ATTR_FOR_STRING /* Construct a tree for a list of two integers. */ @@ -131,6 +132,8 @@ DEF_ATTR_TREE_LIST (ATTR_PURE_NOTHROW_LEAF_LIST, ATTR_PURE, \ DEF_ATTR_TREE_LIST (ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST, \ ATTR_FNSPEC, ATTR_LIST_DOT_DOT_DOT_r_r_r, \ ATTR_NOTHROW_LIST) +DEF_ATTR_TREE_LIST (ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST, \ + ATTR_FNSPEC, ATTR_LIST_DOT_DOT_r_r_r, ATTR_NOTHROW_LIST) DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LIST, ATTR_NORETURN, \ ATTR_NULL, ATTR_NOTHROW_LIST) DEF_ATTR_TREE_LIST (ATTR_NORETURN_NOTHROW_LEAF_LIST, ATTR_NORETURN,\ diff --git gcc/omp-builtins.def gcc/omp-builtins.def index cd273f2..ba64976 100644 --- gcc/omp-builtins.def +++ gcc/omp-builtins.def @@ -32,13 +32,17 @@ along with GCC; see the file COPYING3. If not see DEF_GOACC_BUILTIN (BUILT_IN_ACC_GET_DEVICE_TYPE, "acc_get_device_type", BT_FN_INT, ATTR_NOTHROW_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_START, "GOACC_data_start", - BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_DATA_START, "GOACC_data_start", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR, + ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST, + ATTR_NOTHROW_LIST, "..rrr") DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DATA_END, "GOACC_data_end", BT_FN_VOID, ATTR_NOTHROW_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ENTER_EXIT_DATA, "GOACC_enter_exit_data", - BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, - ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_ENTER_EXIT_DATA, + "GOACC_enter_exit_data", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, + ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST, + ATTR_NOTHROW_LIST, "..rrr") DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_KERNELS_INTERNAL, "GOACC_kernels_internal", BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, @@ -50,9 +54,10 @@ DEF_GOACC_BUILTIN (BUILT_IN_GOACC_KERNELS, "GOACC_kernels", DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel", BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_INT_INT_INT_INT_INT_VAR, ATTR_NOTHROW_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_UPDATE, "GOACC_update", - BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, - ATTR_NOTHROW_LIST) +DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_UPDATE, "GOACC_update", + BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR, + ATTR_FNSPEC_DOT_DOT_r_r_r_NOTHROW_LIST, + ATTR_NOTHROW_LIST, "..rrr") DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp index ed80f5b..4c2928b 100644 --- gcc/testsuite/ChangeLog.gomp +++ gcc/testsuite/ChangeLog.gomp @@ -1,6 +1,21 @@ 2015-04-21 Tom de Vries <t...@codesourcery.com> Thomas Schwinge <tho...@codesourcery.com> + * c-c++-common/goacc/kernels-loop-data-2.c: New test. + * c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: New test. + * c-c++-common/goacc/kernels-loop-data-enter-exit.c: New test. + * c-c++-common/goacc/kernels-loop-data-update.c: New test. + * c-c++-common/goacc/kernels-loop-data.c: New test. + * c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: New + test. + * gfortran.dg/goacc/kernels-loop-data-2.f95: New test. + * gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95: New test. + * gfortran.dg/goacc/kernels-loop-data-enter-exit.f95: New test. + * gfortran.dg/goacc/kernels-loop-data-update.f95: New test. + * gfortran.dg/goacc/kernels-loop-data.f95: New test. + * gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95: New + test. + * c-c++-common/goacc/kernels-counter-vars-function-scope.c: New test. * c-c++-common/goacc/kernels-one-counter-var.c: New test. * g++.dg/ipa/devirt-37.C: Update for new pass_fre. diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c new file mode 100644 index 0000000..fc6da6e --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c @@ -0,0 +1,71 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + } + +#pragma acc data copyout (b[0:N]) + { +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + } + +#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c new file mode 100644 index 0000000..945359f --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c @@ -0,0 +1,69 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N]) +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } +#pragma acc exit data copyout (a[0:N]) + +#pragma acc enter data create (b[0:N]) +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } +#pragma acc exit data copyout (b[0:N]) + + +#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N]) +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } +#pragma acc exit data copyout (c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c new file mode 100644 index 0000000..2d6e5e3 --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c @@ -0,0 +1,66 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c new file mode 100644 index 0000000..c7aaf0f --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c @@ -0,0 +1,66 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc update device (b[0:N]) + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only two loops are analyzed, and that both can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c new file mode 100644 index 0000000..46ca9c5 --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c @@ -0,0 +1,65 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N], b[0:N], c[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only three loops are analyzed, and that all can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c new file mode 100644 index 0000000..3e799ed --- /dev/null +++ gcc/testsuite/c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c @@ -0,0 +1,67 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-ftree-parallelize-loops=32" } */ +/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc parallel present (b[0:N]) + { +#pragma acc loop + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that only two loops are analyzed, and that both can be + parallelized. */ +/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } */ +/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */ + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */ + +/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 new file mode 100644 index 0000000..1b75a23 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 @@ -0,0 +1,52 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc data copyout (a(0:n-1)) + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + !$acc end data + + !$acc data copyout (b(0:n-1)) + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + !$acc end data + + !$acc data copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + !$acc end data + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 new file mode 100644 index 0000000..4ba83b6 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 @@ -0,0 +1,52 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1)) + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + !$acc exit data copyout (a(0:n-1)) + + !$acc enter data create (b(0:n-1)) + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + !$acc exit data copyout (b(0:n-1)) + + !$acc enter data copyin (a(0:n-1), b(0:n-1)) create (c(0:n-1)) + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + !$acc exit data copyout (c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 new file mode 100644 index 0000000..2b05b33 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 @@ -0,0 +1,50 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 new file mode 100644 index 0000000..b3c80dc --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95 @@ -0,0 +1,49 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + do i = 0, n -1 + b(i) = i * 4 + end do + + !$acc update device (b(0:n-1)) + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } + +! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 new file mode 100644 index 0000000..98c5e7a --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95 @@ -0,0 +1,50 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc end data + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 new file mode 100644 index 0000000..7ea2b49 --- /dev/null +++ gcc/testsuite/gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 @@ -0,0 +1,51 @@ +! { dg-additional-options "-O2" } +! { dg-additional-options "-ftree-parallelize-loops=32" } +! { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } +! { dg-additional-options "-fdump-tree-optimized" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc parallel present (b(0:n-1)) + !$acc loop + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end parallel + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main + +! Check that only three loops are analyzed, and that all can be parallelized. +! { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops_oacc_kernels" } } +! { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } + +! Check that the loop has been split off into a function. +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.0 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } } +! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } } + +! { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } +! { dg-final { cleanup-tree-dump "optimized" } } diff --git libgomp/ChangeLog.gomp libgomp/ChangeLog.gomp index bcb3340..3d762bd 100644 --- libgomp/ChangeLog.gomp +++ libgomp/ChangeLog.gomp @@ -1,6 +1,30 @@ 2015-04-21 Tom de Vries <t...@codesourcery.com> Thomas Schwinge <tho...@codesourcery.com> + * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c: New + test. + * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c: + New test. + * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c: + New test. + * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c: + New test. + * testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c: New + test. + * testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c: + New test. + * testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95: New + test. + * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95: + New test. + * testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95: + New test. + * testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95: New + test. + * testsuite/libgomp.oacc-fortran/kernels-loop-data.f95: New test. + * testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95: + New test. + * testsuite/libgomp.oacc-fortran/kernels-loop-2.f95: New test. * testsuite/libgomp.oacc-fortran/kernels-loop.f95: New test. diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c new file mode 100644 index 0000000..325ea7d --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-2.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* { dg-options "-ftree-parallelize-loops=32 -O2" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + } + +#pragma acc data copyout (b[0:N]) + { +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + } + +#pragma acc data copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c new file mode 100644 index 0000000..9c378a2 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit-2.c @@ -0,0 +1,54 @@ +/* { dg-do run } */ +/* { dg-options "-ftree-parallelize-loops=32 -O2" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N]) +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } +#pragma acc exit data copyout (a[0:N]) + +#pragma acc enter data create (b[0:N]) +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } +#pragma acc exit data copyout (b[0:N]) + + +#pragma acc enter data copyin (a[0:N], b[0:N]) create (c[0:N]) +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } +#pragma acc exit data copyout (c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c new file mode 100644 index 0000000..78cf4c1 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-enter-exit.c @@ -0,0 +1,51 @@ +/* { dg-do run } */ +/* { dg-options "-ftree-parallelize-loops=32 -O2" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c new file mode 100644 index 0000000..67c2c36 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data-update.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-ftree-parallelize-loops=32 -O2" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc update device (b[0:N]) + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} + diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c new file mode 100644 index 0000000..acd7f30 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-loop-data.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-options "-ftree-parallelize-loops=32 -O2" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc data copyout (a[0:N], b[0:N], c[0:N]) + { +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc kernels present (b[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + } + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c new file mode 100644 index 0000000..cab10df --- /dev/null +++ libgomp/testsuite/libgomp.oacc-c-c++-common/kernels-parallel-loop-data-enter-exit.c @@ -0,0 +1,52 @@ +/* { dg-do run } */ +/* { dg-options "-ftree-parallelize-loops=32 -O2" } */ + +#include <stdlib.h> + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +int +main (void) +{ + unsigned int *__restrict a; + unsigned int *__restrict b; + unsigned int *__restrict c; + + a = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + b = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + c = (unsigned int *__restrict)malloc (N * sizeof (unsigned int)); + +#pragma acc enter data create (a[0:N], b[0:N], c[0:N]) + +#pragma acc kernels present (a[0:N]) + { + for (COUNTERTYPE i = 0; i < N; i++) + a[i] = i * 2; + } + +#pragma acc parallel present (b[0:N]) + { +#pragma acc loop + for (COUNTERTYPE i = 0; i < N; i++) + b[i] = i * 4; + } + +#pragma acc kernels present (a[0:N], b[0:N], c[0:N]) + { + for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + +#pragma acc exit data copyout (a[0:N], b[0:N], c[0:N]) + + for (COUNTERTYPE i = 0; i < N; i++) + if (c[i] != a[i] + b[i]) + abort (); + + free (a); + free (b); + free (c); + + return 0; +} diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95 new file mode 100644 index 0000000..7b52253 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-2.f95 @@ -0,0 +1,38 @@ +! { dg-do run } +! { dg-options "-ftree-parallelize-loops=32" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc data copyout (a(0:n-1)) + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + !$acc end data + + !$acc data copyout (b(0:n-1)) + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + !$acc end data + + !$acc data copyin (a(0:n-1), b(0:n-1)) copyout (c(0:n-1)) + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + !$acc end data + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95 new file mode 100644 index 0000000..af98efa --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit-2.f95 @@ -0,0 +1,38 @@ +! { dg-do run } +! { dg-options "-ftree-parallelize-loops=32" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1)) + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + !$acc exit data copyout (a(0:n-1)) + + !$acc enter data create (b(0:n-1)) + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + !$acc exit data copyout (b(0:n-1)) + + !$acc enter data copyin (a(0:n-1), b(0:n-1)) create (c(0:n-1)) + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + !$acc exit data copyout (c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95 new file mode 100644 index 0000000..bb6f8dc --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-enter-exit.f95 @@ -0,0 +1,36 @@ +! { dg-do run } +! { dg-options "-ftree-parallelize-loops=32" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95 new file mode 100644 index 0000000..cab1f2c --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data-update.f95 @@ -0,0 +1,36 @@ +! { dg-do run } +! { dg-options "-ftree-parallelize-loops=32" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + do i = 0, n -1 + b(i) = i * 4 + end do + + !$acc update device (b(0:n-1)) + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data.f95 new file mode 100644 index 0000000..f26671d --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-loop-data.f95 @@ -0,0 +1,36 @@ +! { dg-do run } +! { dg-options "-ftree-parallelize-loops=32" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc kernels present (b(0:n-1)) + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end kernels + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc end data + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main diff --git libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95 libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95 new file mode 100644 index 0000000..2322152 --- /dev/null +++ libgomp/testsuite/libgomp.oacc-fortran/kernels-parallel-loop-data-enter-exit.f95 @@ -0,0 +1,37 @@ +! { dg-do run } +! { dg-options "-ftree-parallelize-loops=32" } + +program main + implicit none + integer, parameter :: n = 1024 + integer, dimension (0:n-1) :: a, b, c + integer :: i, ii + + !$acc enter data create (a(0:n-1), b(0:n-1), c(0:n-1)) + + !$acc kernels present (a(0:n-1)) + do i = 0, n - 1 + a(i) = i * 2 + end do + !$acc end kernels + + !$acc parallel present (b(0:n-1)) + !$acc loop + do i = 0, n -1 + b(i) = i * 4 + end do + !$acc end parallel + + !$acc kernels present (a(0:n-1), b(0:n-1), c(0:n-1)) + do ii = 0, n - 1 + c(ii) = a(ii) + b(ii) + end do + !$acc end kernels + + !$acc exit data copyout (a(0:n-1), b(0:n-1), c(0:n-1)) + + do i = 0, n - 1 + if (c(i) .ne. a(i) + b(i)) call abort + end do + +end program main Grüße, Thomas
signature.asc
Description: PGP signature