Re: [gomp4, committed] Implement -foffload-alias
On 04/11/15 09:47, Thomas Schwinge wrote: +/* Check that the loop has been split off into a function. */ >+/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 "optimized" } } */ For C we get: ;; Function foo._omp_fn.0 (foo._omp_fn.0, funcdef_no=12, decl_uid=2534, cgraph_uid=14, symbol_order=14) ..., so that matches, but for C++ we get: ;; Function foo(unsigned int*, unsigned int*, unsigned int*) [clone ._omp_fn.0] (_ZL3fooPjS_S_._omp_fn.0, funcdef_no=12, decl_uid=2416, cgraph_uid=14, symbol_order=14) ..., which doesn't match, so this directive FAILs. Hi Thomas, thanks for noticing. Fixed as attached. Committed to gomp-4_0-branch. Thanks, - Tom Fixup goacc/kernels-loop-offload-alias-none.c 2015-11-04 Tom de Vries* c-c++-common/goacc/kernels-loop-offload-alias-none.c: Fix foo._omp_fn.0 function name scanning. --- gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c index bb96330..79d8daa 100644 --- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c @@ -49,7 +49,7 @@ main (void) } /* Check that the loop has been split off into a function. */ -/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo.*\\._omp_fn\\.0" 1 "optimized" } } */ /* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */ /* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */ -- 1.9.1
Re: [gomp4, committed] Implement -foffload-alias
Hi Tom! On Tue, 3 Nov 2015 15:33:17 +0100, Tom de Vrieswrote: > On 03/11/15 15:19, Tom de Vries wrote: > > I've dropped the two testcases from this patch, I'll commit in a > > follow-up patch. > > Committed to gomp-4_0-branch, as attached. > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c > @@ -0,0 +1,61 @@ > +/* { dg-additional-options "-O2" } */ > +/* { dg-additional-options "-fdump-tree-optimized" } */ > +/* { dg-additional-options "-fdump-tree-alias-all" } */ > +/* { dg-additional-options "-foffload-alias=none" } */ > + > +#include > + > +#define N (1024 * 512) > +#define COUNTERTYPE unsigned int > + > +static void > +foo (unsigned int *a, unsigned int *b, unsigned int *c) > +{ > + for (COUNTERTYPE i = 0; i < N; i++) > +a[i] = i * 2; > + > + for (COUNTERTYPE i = 0; i < N; i++) > +b[i] = i * 4; > + > +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) > + { > +for (COUNTERTYPE ii = 0; ii < N; ii++) > + c[ii] = a[ii] + b[ii]; > + } > + > + for (COUNTERTYPE i = 0; i < N; i++) > +if (c[i] != a[i] + b[i]) > + abort (); > +} > + > +int > +main (void) > +{ > + unsigned int *a; > + unsigned int *b; > + unsigned int *c; > + > + a = (unsigned int *)malloc (N * sizeof (unsigned int)); > + b = (unsigned int *)malloc (N * sizeof (unsigned int)); > + c = (unsigned int *)malloc (N * sizeof (unsigned int)); > + > + foo (a, b, c); > + > + free (a); > + free (b); > + free (c); > + > + return 0; > +} > + > +/* Check that the loop has been split off into a function. */ > +/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 > "optimized" } } */ For C we get: ;; Function foo._omp_fn.0 (foo._omp_fn.0, funcdef_no=12, decl_uid=2534, cgraph_uid=14, symbol_order=14) ..., so that matches, but for C++ we get: ;; Function foo(unsigned int*, unsigned int*, unsigned int*) [clone ._omp_fn.0] (_ZL3fooPjS_S_._omp_fn.0, funcdef_no=12, decl_uid=2416, cgraph_uid=14, symbol_order=14) ..., which doesn't match, so this directive FAILs. > + > +/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 6" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 7" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 9 "alias" } } */ > --- /dev/null > +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c > @@ -0,0 +1,44 @@ > +/* { dg-additional-options "-O2" } */ > +/* { dg-additional-options "-fdump-tree-optimized" } */ > +/* { dg-additional-options "-fdump-tree-alias-all" } */ > +/* { dg-additional-options "-foffload-alias=pointer" } */ > + > +#include > + > +#define N (1024 * 512) > +#define COUNTERTYPE unsigned int > + > +unsigned int a[N]; > +unsigned int b[N]; > +unsigned int c[N]; > + > +int > +main (void) > +{ > + for (COUNTERTYPE i = 0; i < N; i++) > +a[i] = i * 2; > + > + for (COUNTERTYPE i = 0; i < N; i++) > +b[i] = i * 4; > + > +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) > + { > +for (COUNTERTYPE ii = 0; ii < N; ii++) > + c[ii] = a[ii] + b[ii]; > + } > + > + for (COUNTERTYPE i = 0; i < N; i++) > +if (c[i] != a[i] + b[i]) > + abort (); > + > + return 0; > +} > + > +/* Check that the loop has been split off into a function. */ > +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 > "optimized" } } */ This works for both C and C++. > + > +/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */ > +/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 6 "alias" } } */ Grüße Thomas signature.asc Description: PGP signature
Re: [gomp4, committed] Implement -foffload-alias
On 03/11/15 15:19, Tom de Vries wrote: I've dropped the two testcases from this patch, I'll commit in a follow-up patch. Committed to gomp-4_0-branch, as attached. Thanks, - Tom Add goacc/kernels-loop-offload-alias-{none,ptr}.c 2015-11-03 Tom de Vries* c-c++-common/goacc/kernels-loop-offload-alias-none.c: New test. * c-c++-common/goacc/kernels-loop-offload-alias-ptr.c: New test. --- .../goacc/kernels-loop-offload-alias-none.c| 61 ++ .../goacc/kernels-loop-offload-alias-ptr.c | 44 2 files changed, 105 insertions(+) create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c create mode 100644 gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c new file mode 100644 index 000..bb96330 --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-none.c @@ -0,0 +1,61 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ +/* { dg-additional-options "-fdump-tree-alias-all" } */ +/* { dg-additional-options "-foffload-alias=none" } */ + +#include + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +static void +foo (unsigned int *a, unsigned int *b, unsigned int *c) +{ + for (COUNTERTYPE i = 0; i < N; i++) +a[i] = i * 2; + + for (COUNTERTYPE i = 0; i < N; i++) +b[i] = i * 4; + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { +for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (COUNTERTYPE i = 0; i < N; i++) +if (c[i] != a[i] + b[i]) + abort (); +} + +int +main (void) +{ + unsigned int *a; + unsigned int *b; + unsigned int *c; + + a = (unsigned int *)malloc (N * sizeof (unsigned int)); + b = (unsigned int *)malloc (N * sizeof (unsigned int)); + c = (unsigned int *)malloc (N * sizeof (unsigned int)); + + foo (a, b, c); + + free (a); + free (b); + free (c); + + return 0; +} + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*foo._omp_fn.0" 1 "optimized" } } */ + +/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 5" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 6" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 7" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 9 "alias" } } */ diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c new file mode 100644 index 000..de4f45a --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-offload-alias-ptr.c @@ -0,0 +1,44 @@ +/* { dg-additional-options "-O2" } */ +/* { dg-additional-options "-fdump-tree-optimized" } */ +/* { dg-additional-options "-fdump-tree-alias-all" } */ +/* { dg-additional-options "-foffload-alias=pointer" } */ + +#include + +#define N (1024 * 512) +#define COUNTERTYPE unsigned int + +unsigned int a[N]; +unsigned int b[N]; +unsigned int c[N]; + +int +main (void) +{ + for (COUNTERTYPE i = 0; i < N; i++) +a[i] = i * 2; + + for (COUNTERTYPE i = 0; i < N; i++) +b[i] = i * 4; + +#pragma acc kernels copyin (a[0:N], b[0:N]) copyout (c[0:N]) + { +for (COUNTERTYPE ii = 0; ii < N; ii++) + c[ii] = a[ii] + b[ii]; + } + + for (COUNTERTYPE i = 0; i < N; i++) +if (c[i] != a[i] + b[i]) + abort (); + + return 0; +} + +/* Check that the loop has been split off into a function. */ +/* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */ + +/* { dg-final { scan-tree-dump-times "clique 1 base 1" 3 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 2" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 3" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "clique 1 base 4" 1 "alias" } } */ +/* { dg-final { scan-tree-dump-times "(?n)clique .* base .*" 6 "alias" } } */ -- 1.9.1
[gomp4, committed] Implement -foffload-alias
[ was: Re: [gomp4, WIP] Implement -foffload-alias ] On 28/09/15 17:38, Tom de Vries wrote: Hi, this work-in-progress patch implements a new option -foffload-alias=. The option -foffload-alias=none instructs the compiler to assume that objects references and pointer dereferences in an offload region do not alias. The option -foffload-alias=pointer instructs the compiler to assume that objects references in an offload region do not alias. The option -foffload-alias=all instructs the compiler to make no assumptions about aliasing in offload regions. The default value is -foffload-alias=none. The patch works by adding restrict to the types of the fields used to pass data to an offloading region. Updated patch attached, committed to gomp-4_0-branch. Atm, the kernels-loop-offload-alias-ptr.c test-case passes, but the kernels-loop-offload-alias-none.c test-case fails. I've dropped the two testcases from this patch, I'll commit in a follow-up patch. For the latter, the required amount of restrict is added, but it has no effect. I've reported this in a more basic form in PR67742: "3rd-level restrict ignored". I've committed a fix for that PR as reported here: https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00204.html . Furthermore, I've added support for the option in the 'mask & 4' case in install_var_field, I ran into this when trying out some Fortran test-cases. Thanks, - Tom Implement -foffload-alias 2015-09-28 Tom de Vries * common.opt (foffload-alias): New option. * flag-types.h (enum offload_alias): New enum. * omp-low.c (install_var_field): Handle flag_offload_alias. * doc/invoke.texi (@item Code Generation Options): Add -foffload-alias. (@item -foffload-alias): New item. --- gcc/common.opt | 16 gcc/doc/invoke.texi | 11 +++ gcc/flag-types.h| 7 +++ gcc/omp-low.c | 28 ++-- 4 files changed, 60 insertions(+), 2 deletions(-) diff --git a/gcc/common.opt b/gcc/common.opt index c85ab49..135e777 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1738,6 +1738,22 @@ Enum(offload_abi) String(ilp32) Value(OFFLOAD_ABI_ILP32) EnumValue Enum(offload_abi) String(lp64) Value(OFFLOAD_ABI_LP64) +foffload-alias= +Common Joined RejectNegative Enum(offload_alias) Var(flag_offload_alias) Init(OFFLOAD_ALIAS_NONE) +-foffload-alias=[all|pointer|none] Assume non-aliasing in an offload region + +Enum +Name(offload_alias) Type(enum offload_alias) UnknownError(unknown offload aliasing %qs) + +EnumValue +Enum(offload_alias) String(all) Value(OFFLOAD_ALIAS_ALL) + +EnumValue +Enum(offload_alias) String(pointer) Value(OFFLOAD_ALIAS_POINTER) + +EnumValue +Enum(offload_alias) String(none) Value(OFFLOAD_ALIAS_NONE) + fomit-frame-pointer Common Report Var(flag_omit_frame_pointer) Optimization When possible do not generate stack frames. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 5a07512..8967f88 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1142,6 +1142,7 @@ See S/390 and zSeries Options. -finstrument-functions-exclude-function-list=@var{sym},@var{sym},@dots{} @gol -finstrument-functions-exclude-file-list=@var{file},@var{file},@dots{} @gol -fno-common -fno-ident @gol +-foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]} @gol -fpcc-struct-return -fpic -fPIC -fpie -fPIE -fno-plt @gol -fno-jump-tables @gol -frecord-gcc-switches @gol @@ -23842,6 +23843,16 @@ The options @option{-ftrapv} and @option{-fwrapv} override each other, so using using @option{-ftrapv} @option{-fwrapv} @option{-fno-wrapv} on the command-line results in @option{-ftrapv} being effective. +@item -foffload-alias=@r{[}none@r{|}pointer@r{|}all@r{]} +@opindex -foffload-alias +The option @option{-foffload-alias=none} instructs the compiler to assume that +objects references and pointer dereferences in an offload region do not alias. +The option @option{-foffload-alias=pointer} instruct the compiler to assume that +objects references in an offload region do not alias. The option +@option{-foffload-alias=all} instructs the compiler to make no assumptions about +aliasing in offload regions. The default value is +@option{-foffload-alias=none}. + @item -fexceptions @opindex fexceptions Enable exception handling. Generates extra code needed to propagate diff --git a/gcc/flag-types.h b/gcc/flag-types.h index 6301cea..87b1677 100644 --- a/gcc/flag-types.h +++ b/gcc/flag-types.h @@ -293,5 +293,12 @@ enum gfc_convert GFC_FLAG_CONVERT_LITTLE }; +enum offload_alias +{ + OFFLOAD_ALIAS_ALL, + OFFLOAD_ALIAS_POINTER, + OFFLOAD_ALIAS_NONE +}; + #endif /* ! GCC_FLAG_TYPES_H */ diff --git a/gcc/omp-low.c b/gcc/omp-low.c index 3543785..6bac074 100644 --- a/gcc/omp-low.c +++ b/gcc/omp-low.c @@ -1441,6 +1441,14 @@ install_var_field (tree var, bool by_ref, int mask, omp_context *ctx) tree field, type, sfield = NULL_TREE; splay_tree_key key = (splay_tree_key) var; + /*