https://gcc.gnu.org/g:3c0d30600e09e0cdd62b8eaa1ee046f93756f8be
commit 3c0d30600e09e0cdd62b8eaa1ee046f93756f8be Author: Julian Brown <jul...@codesourcery.com> Date: Tue Feb 26 15:55:23 2019 -0800 Don't mark OpenACC auto loops as independent inside acc parallel regions 2018-09-20 Cesar Philippidis <ce...@codesourcery.com> gcc/ * omp-low.cc (lower_oacc_head_mark): Don't mark OpenACC auto loops as independent inside acc parallel regions. gcc/testsuite/ * c-c++-common/goacc/loop-auto-1.c: Adjust test case to conform to the new behavior of the auto clause in OpenACC 2.5. * c-c++-common/goacc/loop-auto-2.c: Likewise. * gcc.dg/goacc/loop-processing-1.c: Likewise. * c-c++-common/goacc/loop-auto-3.c: New test. * gfortran.dg/goacc/loop-auto-1.f90: New test. libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust test case to conform to the new behavior of the auto clause in OpenACC 2.5. Diff: --- gcc/ChangeLog.omp | 5 ++ gcc/omp-low.cc | 6 +- gcc/testsuite/ChangeLog.omp | 9 +++ gcc/testsuite/c-c++-common/goacc/loop-auto-1.c | 50 ++++++------ gcc/testsuite/c-c++-common/goacc/loop-auto-2.c | 4 +- gcc/testsuite/c-c++-common/goacc/loop-auto-3.c | 78 +++++++++++++++++++ gcc/testsuite/gcc.dg/goacc/loop-processing-1.c | 2 +- gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 | 88 ++++++++++++++++++++++ libgomp/ChangeLog.omp | 5 ++ .../libgomp.oacc-c-c++-common/loop-auto-1.c | 20 ++--- 10 files changed, 227 insertions(+), 40 deletions(-) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index b305525df30..e2c65c4e789 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,8 @@ +2018-09-20 Cesar Philippidis <ce...@codesourcery.com> + + * omp-low.cc (lower_oacc_head_mark): Don't mark OpenACC auto + loops as independent inside acc parallel regions. + 2018-12-22 Cesar Philippidis <ce...@codesourcery.com> Julian Brown <jul...@codesourcery.com> Tobias Burnus <tob...@codesourcery.com> diff --git a/gcc/omp-low.cc b/gcc/omp-low.cc index e53e4e3a7db..a6da27934de 100644 --- a/gcc/omp-low.cc +++ b/gcc/omp-low.cc @@ -8545,8 +8545,10 @@ lower_oacc_head_mark (location_t loc, tree ddvar, tree clauses, else gcc_unreachable (); - /* In a parallel region, loops are implicitly INDEPENDENT. */ - if (!tgt || is_oacc_parallel_or_serial (tgt)) + /* In a parallel region, loops without auto and seq clauses are + implicitly INDEPENDENT. */ + if ((!tgt || is_oacc_parallel_or_serial (tgt)) + && !(tag & (OLF_SEQ | OLF_AUTO))) tag |= OLF_INDEPENDENT; /* Loops inside OpenACC 'kernels' decomposed parts' regions are expected to diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp index bb18fa23628..5f40504a53e 100644 --- a/gcc/testsuite/ChangeLog.omp +++ b/gcc/testsuite/ChangeLog.omp @@ -1,3 +1,12 @@ +2018-09-20 Cesar Philippidis <ce...@codesourcery.com> + + * c-c++-common/goacc/loop-auto-1.c: Adjust test case to conform to + the new behavior of the auto clause in OpenACC 2.5. + * c-c++-common/goacc/loop-auto-2.c: Likewise. + * gcc.dg/goacc/loop-processing-1.c: Likewise. + * c-c++-common/goacc/loop-auto-3.c: New test. + * gfortran.dg/goacc/loop-auto-1.f90: New test. + 2018-10-05 Nathan Sidwell <nat...@acm.org> Tom de Vries <tdevr...@suse.de> Thomas Schwinge <tho...@codesourcery.com> diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c index 124befc4002..dcad07f11c8 100644 --- a/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-1.c @@ -10,7 +10,7 @@ void Foo () #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -20,7 +20,7 @@ void Foo () #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -51,7 +51,7 @@ void Foo () #pragma acc loop vector for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) {} } @@ -64,27 +64,27 @@ void Foo () } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int kx = 0; kx < 10; kx++) {} } } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int lx = 0; lx < 10; lx++) {} } } @@ -101,7 +101,7 @@ void Gang (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -111,7 +111,7 @@ void Gang (void) #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -142,7 +142,7 @@ void Gang (void) #pragma acc loop vector for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int kx = 0; kx < 10; kx++) {} } @@ -176,7 +176,7 @@ void Worker (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } @@ -186,7 +186,7 @@ void Worker (void) #pragma acc loop auto for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { #pragma acc loop vector @@ -194,20 +194,20 @@ void Worker (void) } } -#pragma acc loop auto +#pragma acc loop for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop for (int jx = 0; jx < 10; jx++) {} } -#pragma acc loop auto +#pragma acc loop for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) { -#pragma acc loop auto +#pragma acc loop for (int kx = 0; kx < 10; kx++) {} } } @@ -222,17 +222,17 @@ void Vector (void) #pragma acc loop seq for (int jx = 0; jx < 10; jx++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) {} } -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 10; ix++) {} -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int ix = 0; ix < 10; ix++) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 10; jx++) {} } } @@ -240,6 +240,6 @@ void Vector (void) #pragma acc routine seq void Seq (void) { -#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int ix = 0; ix < 10; ix++) {} } diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c index af3f0bddf2c..5aa36e93ab8 100644 --- a/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-2.c @@ -72,12 +72,12 @@ void Bad () #pragma acc loop tile(*) gang vector for (int ix = 0; ix < 10; ix++) { - #pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ + #pragma acc loop auto independent /* { dg-warning "insufficient partitioning" } */ for (int jx = 0; jx < 10; jx++) ; } -#pragma acc loop tile(*) auto /* { dg-warning "insufficient partitioning" } */ +#pragma acc loop tile(*) auto independent /* { dg-warning "insufficient partitioning" } */ for (int ix = 0; ix < 10; ix++) { #pragma acc loop worker diff --git a/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c b/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c new file mode 100644 index 00000000000..8f79ead16aa --- /dev/null +++ b/gcc/testsuite/c-c++-common/goacc/loop-auto-3.c @@ -0,0 +1,78 @@ +/* Ensure that the auto clause falls back to seq parallelism when the + OpenACC loop is not explicitly independent. */ + +/* { dg-additional-options "-fopt-info-optimized-omp" } */ + +void +test () +{ + int i, j, k, l, n = 100; + +#pragma acc parallel loop auto /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + for (i = 0; i < n; i++) +#pragma acc loop auto independent /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */ + for (j = 0; j < n; j++) +#pragma acc loop worker vector /* { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } */ + for (k = 0; k < n; k++) + ; + +#pragma acc parallel loop auto independent /* { dg-message "optimized: assigned OpenACC gang worker loop parallelism" } */ + for (i = 0; i < n; i++) +#pragma acc loop auto /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + for (j = 0; j < n; j++) +#pragma acc loop auto /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + for (k = 0; k < n; k++) +#pragma acc loop auto independent /* { dg-message "optimized: assigned OpenACC vector loop parallelism" } */ + for (l = 0; l < n; l++) + ; + +#pragma acc parallel loop gang /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */ + for (i = 0; i < n; i++) +#pragma acc loop worker /* { dg-message "optimized: assigned OpenACC worker loop parallelism" } */ + for (j = 0; j < n; j++) +#pragma acc loop vector /* { dg-message "optimized: assigned OpenACC vector loop parallelism" } */ + for (k = 0; k < n; k++) + { +#pragma acc loop auto independent /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + /* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } */ + for (l = 0; l < n; l++) + ; +#pragma acc loop auto /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + for (l = 0; l < n; l++) + ; + } + +#pragma acc parallel loop /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + /* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } */ + for (i = 0; i < n; i++) + { +#pragma acc loop gang worker /* { dg-message "optimized: assigned OpenACC gang worker loop parallelism" } */ + for (j = 0; j < n; j++) +#pragma acc loop auto /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + for (k = 0; k < n; k++) + { +#pragma acc loop vector /* { dg-message "optimized: assigned OpenACC vector loop parallelism" } */ + for (l = 0; l < n; l++) + ; +#pragma acc loop auto independent /* { dg-message "optimized: assigned OpenACC vector loop parallelism" } */ + for (l = 0; l < n; l++) + ; + } +#pragma acc loop worker /* { dg-message "optimized: assigned OpenACC worker loop parallelism" } */ + for (j = 0; j < n; j++) +#pragma acc loop vector /* { dg-message "optimized: assigned OpenACC vector loop parallelism" } */ + for (k = 0; k < n; k++) + ; + } + +#pragma acc parallel loop /* { dg-message "optimized: assigned OpenACC gang loop parallelism" } */ + for (i = 0; i < n; i++) +#pragma acc loop /* { dg-message "optimized: assigned OpenACC worker loop parallelism" } */ + for (j = 0; j < n; j++) +#pragma acc loop /* { dg-message "optimized: assigned OpenACC seq loop parallelism" } */ + /* { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } */ + for (k = 0; k < n; k++) +#pragma acc loop /* { dg-message "optimized: assigned OpenACC vector loop parallelism" } */ + for (l = 0; l < n; l++) + ; +} diff --git a/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c b/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c index d7447fd9419..6e034d17496 100644 --- a/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c +++ b/gcc/testsuite/gcc.dg/goacc/loop-processing-1.c @@ -9,7 +9,7 @@ void vector_1 (int *ary, int size) { #pragma acc loop gang for (int jx = 0; jx < 1; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < size; ix++) ary[ix] = place (); } diff --git a/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 b/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 new file mode 100644 index 00000000000..8d600f49c14 --- /dev/null +++ b/gcc/testsuite/gfortran.dg/goacc/loop-auto-1.f90 @@ -0,0 +1,88 @@ +! Ensure that the auto clause falls back to seq parallelism when the +! OpenACC loop is not explicitly independent. + +! { dg-additional-options "-fopt-info-optimized-omp" } + +program test + implicit none + integer, parameter :: n = 100 + integer i, j, k, l + + !$acc parallel loop auto ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + do i = 1, n + !$acc loop auto independent ! { dg-message "optimized: assigned OpenACC gang loop parallelism" } + do j = 1, n + !$acc loop worker vector ! { dg-message "optimized: assigned OpenACC worker vector loop parallelism" } + do k = 1, n + end do + end do + end do + + !$acc parallel loop auto independent ! { dg-message "optimized: assigned OpenACC gang worker loop parallelism" } + do i = 1, n + !$acc loop auto ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + do j = 1, n + !$acc loop auto ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + do k = 1, n + !$acc loop auto independent ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + do l = 1, n + end do + end do + end do + end do + + !$acc parallel loop gang ! { dg-message "optimized: assigned OpenACC gang loop parallelism" } + do i = 1, n + !$acc loop worker ! { dg-message "optimized: assigned OpenACC worker loop parallelism" } + do j = 1, n + !$acc loop vector ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + do k = 1, n + !$acc loop auto independent ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + ! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } + do l = 1, n + end do + !$acc loop auto ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + do l = 1, n + end do + end do + end do + end do + + + !$acc parallel loop ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + ! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } + do i = 1, n + !$acc loop gang worker ! { dg-message "optimized: assigned OpenACC gang worker loop parallelism" } + do j = 1, n + !$acc loop auto ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + do k = 1, n + !$acc loop vector ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + do l = 1, n + end do + end do + !$acc loop auto independent ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + do l = 1, n + end do + end do + !$acc loop worker ! { dg-message "optimized: assigned OpenACC worker loop parallelism" } + do j = 1, n + !$acc loop vector ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + do k = 1, n + end do + end do + end do + + !$acc parallel loop ! { dg-message "optimized: assigned OpenACC gang loop parallelism" } + do i = 1, n + !$acc loop ! { dg-message "optimized: assigned OpenACC worker loop parallelism" } + do j = 1, n + !$acc loop ! { dg-message "optimized: assigned OpenACC seq loop parallelism" } + ! { dg-warning "insufficient partitioning available to parallelize loop" "" { target *-*-* } .-1 } + do k = 1, n + !$acc loop ! { dg-message "optimized: assigned OpenACC vector loop parallelism" } + do l = 1, n + end do + end do + end do + end do +end program test diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp index cbc1a0965b7..022888babdd 100644 --- a/libgomp/ChangeLog.omp +++ b/libgomp/ChangeLog.omp @@ -1,3 +1,8 @@ +2018-09-20 Cesar Philippidis <ce...@codesourcery.com> + + * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust test case + to conform to the new behavior of the auto clause in OpenACC 2.5. + 2018-12-22 Cesar Philippidis <ce...@codesourcery.com> Julian Brown <jul...@codesourcery.com> diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c index c13cab780cb..41827559297 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c @@ -107,7 +107,7 @@ int vector_1 (int *ary, int size) { #pragma acc loop gang for (int jx = 0; jx < 1; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < size; ix++) ary[ix] = place (); } @@ -123,7 +123,7 @@ int vector_2 (int *ary, int size) { #pragma acc loop worker for (int jx = 0; jx < size / 64; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 64; ix++) ary[ix + jx * 64] = place (); } @@ -139,7 +139,7 @@ int worker_1 (int *ary, int size) { #pragma acc loop gang for (int kx = 0; kx < 1; kx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size / 64; jx++) #pragma acc loop vector for (int ix = 0; ix < 64; ix++) @@ -156,7 +156,7 @@ int gang_1 (int *ary, int size) #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) /* { dg-warning "region is vector partitioned but does not contain vector partitioned code" "" { target *-*-* } .-1 } */ { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size / 64; jx++) #pragma acc loop worker for (int ix = 0; ix < 64; ix++) @@ -172,11 +172,11 @@ int gang_2 (int *ary, int size) #pragma acc parallel num_gangs (32) num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) { -#pragma acc loop auto +#pragma acc loop auto independent for (int kx = 0; kx < size / (32 * 32); kx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < 32; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 32; ix++) ary[ix + jx * 32 + kx * 32 * 32] = place (); } @@ -190,9 +190,9 @@ int gang_3 (int *ary, int size) #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size / 64; jx++) -#pragma acc loop auto +#pragma acc loop auto independent for (int ix = 0; ix < 64; ix++) ary[ix + jx * 64] = place (); } @@ -206,7 +206,7 @@ int gang_4 (int *ary, int size) #pragma acc parallel vector_length(32) copy(ary[0:size]) firstprivate (size) { -#pragma acc loop auto +#pragma acc loop auto independent for (int jx = 0; jx < size; jx++) ary[jx] = place (); }