The OpenACC runtime, specifically GOACC_parallel_keyed, is not respecting the on device offset of subarrays which may arise in present data clauses. This is a problem when base element of a subarray some variable specified in such a present data clause does not match the base element for a subarray of the same variable that was copied or created in an enclosing acc data region and/or acc enter data directive. E.g.
#pragma acc data copy (some_var[10:100]) { #pragma acc parallel present (some_var[20:5]) } Note how some_var has a base element of 20 in the acc parallel construct, and a base element of 10 in the acc data construct. Currently, GOACC_parallel_keyed is not adjusting the device address for some_var; it's passing &some_var[10] to nvptx_exec, instead of &some_var[20]. The fix here is to teach GOACC_parallel_keyed to add the offset for some_var as determined by gomp_map_vars, which this patch does. This issue was causing SPEC_ACCEL 304.olbm to generate bogus results and fail during specdiff. I've applied patch to gomp-4_0-branch. Cesar
2016-08-23 Cesar Philippidis <ce...@codesourcery.com> libgomp/ * oacc-parallel.c (GOACC_parallel_keyed): Add offset to devaddrs. * testsuite/libgomp.oacc-c-c++-common/data_offset.c: New test. * testsuite/libgomp.oacc-fortran/data_offset.f90: New test. diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 454b550..deab4b3 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -235,7 +235,8 @@ GOACC_parallel_keyed (int device, void (*fn) (void *), { if (tgt->list[i].key != NULL) devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start - + tgt->list[i].key->tgt_offset); + + tgt->list[i].key->tgt_offset + + tgt->list[i].offset); else devaddrs[i] = NULL; } diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c new file mode 100644 index 0000000..ccbbfca --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c @@ -0,0 +1,41 @@ +/* Test present data clauses in acc offloaded regions when the + subarray inside the present clause does not have the same base + offset value as the subarray in the enclosing acc data or acc enter + data variable. */ + +#include <assert.h> + +void +offset (int *data, int n) +{ + int i; + +#pragma acc parallel loop present (data[0:n]) + for (i = 0; i < n; i++) + data[i] = n; +} + +int +main () +{ + const int n = 30; + int data[n], i; + + for (i = 0; i < n; i++) + data[i] = -1; + +#pragma acc data copy(data[0:n]) + { + offset (data+10, 10); + } + + for (i = 0; i < n; i++) + { + if (i < 10 || i >= 20) + assert (data[i] == -1); + else + assert (data[i] == 10); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90 new file mode 100644 index 0000000..ff8ee39 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90 @@ -0,0 +1,43 @@ +! Test present data clauses in acc offloaded regions when the subarray +! inside the present clause does not have the same base offset value +! as the subarray in the enclosing acc data or acc enter data variable. + +program test + implicit none + + integer, parameter :: n = 30, m = 10 + integer :: i + integer, allocatable :: data(:) + logical bounded + + allocate (data(n)) + + data(:) = -1 + + !$acc data copy (data(5:20)) + call test_data (data, n, m) + !$acc end data + + do i = 1, n + bounded = i < m .or. i >= m+m + if (bounded .and. (data(i) /= -1)) then + call abort + else if (.not. bounded .and. data(i) /= 10) then + call abort + end if + end do + + deallocate (data) +end program test + +subroutine test_data (data, n, m) + implicit none + + integer :: n, m, data(n), i + + !$acc parallel loop present (data(m:m)) + do i = m, m+m-1 + data(i) = m + end do + !$acc end parallel loop +end subroutine test_data