The OpenACC runtime, specifically GOACC_parallel_keyed, is not
respecting the on device offset of subarrays which may arise in present
data clauses. This is a problem when base element of a subarray some
variable specified in such a present data clause does not match the base
element for a subarray of the same variable that was copied or created
in an enclosing acc data region and/or acc enter data directive. E.g.
#pragma acc data copy (some_var[10:100])
{
#pragma acc parallel present (some_var[20:5])
}
Note how some_var has a base element of 20 in the acc parallel
construct, and a base element of 10 in the acc data construct.
Currently, GOACC_parallel_keyed is not adjusting the device address for
some_var; it's passing &some_var[10] to nvptx_exec, instead of
&some_var[20]. The fix here is to teach GOACC_parallel_keyed to add the
offset for some_var as determined by gomp_map_vars, which this patch does.
This issue was causing SPEC_ACCEL 304.olbm to generate bogus results and
fail during specdiff. I've applied patch to gomp-4_0-branch.
Cesar
2016-08-23 Cesar Philippidis <[email protected]>
libgomp/
* oacc-parallel.c (GOACC_parallel_keyed): Add offset to devaddrs.
* testsuite/libgomp.oacc-c-c++-common/data_offset.c: New test.
* testsuite/libgomp.oacc-fortran/data_offset.f90: New test.
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 454b550..deab4b3 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -235,7 +235,8 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
{
if (tgt->list[i].key != NULL)
devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
- + tgt->list[i].key->tgt_offset);
+ + tgt->list[i].key->tgt_offset
+ + tgt->list[i].offset);
else
devaddrs[i] = NULL;
}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c
new file mode 100644
index 0000000..ccbbfca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c
@@ -0,0 +1,41 @@
+/* Test present data clauses in acc offloaded regions when the
+ subarray inside the present clause does not have the same base
+ offset value as the subarray in the enclosing acc data or acc enter
+ data variable. */
+
+#include <assert.h>
+
+void
+offset (int *data, int n)
+{
+ int i;
+
+#pragma acc parallel loop present (data[0:n])
+ for (i = 0; i < n; i++)
+ data[i] = n;
+}
+
+int
+main ()
+{
+ const int n = 30;
+ int data[n], i;
+
+ for (i = 0; i < n; i++)
+ data[i] = -1;
+
+#pragma acc data copy(data[0:n])
+ {
+ offset (data+10, 10);
+ }
+
+ for (i = 0; i < n; i++)
+ {
+ if (i < 10 || i >= 20)
+ assert (data[i] == -1);
+ else
+ assert (data[i] == 10);
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90
new file mode 100644
index 0000000..ff8ee39
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90
@@ -0,0 +1,43 @@
+! Test present data clauses in acc offloaded regions when the subarray
+! inside the present clause does not have the same base offset value
+! as the subarray in the enclosing acc data or acc enter data variable.
+
+program test
+ implicit none
+
+ integer, parameter :: n = 30, m = 10
+ integer :: i
+ integer, allocatable :: data(:)
+ logical bounded
+
+ allocate (data(n))
+
+ data(:) = -1
+
+ !$acc data copy (data(5:20))
+ call test_data (data, n, m)
+ !$acc end data
+
+ do i = 1, n
+ bounded = i < m .or. i >= m+m
+ if (bounded .and. (data(i) /= -1)) then
+ call abort
+ else if (.not. bounded .and. data(i) /= 10) then
+ call abort
+ end if
+ end do
+
+ deallocate (data)
+end program test
+
+subroutine test_data (data, n, m)
+ implicit none
+
+ integer :: n, m, data(n), i
+
+ !$acc parallel loop present (data(m:m))
+ do i = m, m+m-1
+ data(i) = m
+ end do
+ !$acc end parallel loop
+end subroutine test_data