The OpenACC runtime, specifically GOACC_parallel_keyed, is not
respecting the on device offset of subarrays which may arise in present
data clauses. This is a problem when base element of a subarray some
variable specified in such a present data clause does not match the base
element for a subarray of the same variable that was copied or created
in an enclosing acc data region and/or acc enter data directive. E.g.

  #pragma acc data copy (some_var[10:100])
  {
     #pragma acc parallel present (some_var[20:5])
  }

Note how some_var has a base element of 20 in the acc parallel
construct, and a base element of 10 in the acc data construct.
Currently, GOACC_parallel_keyed is not adjusting the device address for
some_var; it's passing &some_var[10] to nvptx_exec, instead of
&some_var[20]. The fix here is to teach GOACC_parallel_keyed to add the
offset for some_var as determined by gomp_map_vars, which this patch does.

This issue was causing SPEC_ACCEL 304.olbm to generate bogus results and
fail during specdiff. I've applied patch to gomp-4_0-branch.

Cesar
2016-08-23  Cesar Philippidis  <ce...@codesourcery.com>

	libgomp/
	* oacc-parallel.c (GOACC_parallel_keyed): Add offset to devaddrs.
	* testsuite/libgomp.oacc-c-c++-common/data_offset.c: New test.
	* testsuite/libgomp.oacc-fortran/data_offset.f90: New test.


diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 454b550..deab4b3 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -235,7 +235,8 @@ GOACC_parallel_keyed (int device, void (*fn) (void *),
     {
       if (tgt->list[i].key != NULL)
 	devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
-				+ tgt->list[i].key->tgt_offset);
+				+ tgt->list[i].key->tgt_offset
+				+ tgt->list[i].offset);
       else
 	devaddrs[i] = NULL;
     }
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c
new file mode 100644
index 0000000..ccbbfca
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/data_offset.c
@@ -0,0 +1,41 @@
+/* Test present data clauses in acc offloaded regions when the
+   subarray inside the present clause does not have the same base
+   offset value as the subarray in the enclosing acc data or acc enter
+   data variable.  */
+
+#include <assert.h>
+
+void
+offset (int *data, int n)
+{
+  int i;
+
+#pragma acc parallel loop present (data[0:n])
+  for (i = 0; i < n; i++)
+    data[i] = n;
+}
+
+int
+main ()
+{
+  const int n = 30;
+  int data[n], i;
+
+  for (i = 0; i < n; i++)
+    data[i] = -1;
+
+#pragma acc data copy(data[0:n])
+  {
+    offset (data+10, 10);
+  }
+
+  for (i = 0; i < n; i++)
+    {
+      if (i < 10 || i >= 20)
+	assert (data[i] == -1);
+      else
+	assert (data[i] == 10);
+    }
+
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90 b/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90
new file mode 100644
index 0000000..ff8ee39
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/data_offset.f90
@@ -0,0 +1,43 @@
+! Test present data clauses in acc offloaded regions when the subarray
+! inside the present clause does not have the same base offset value
+! as the subarray in the enclosing acc data or acc enter data variable.
+
+program test
+  implicit none
+
+  integer, parameter :: n = 30, m = 10
+  integer :: i
+  integer, allocatable :: data(:)
+  logical bounded
+
+  allocate (data(n))
+
+  data(:) = -1
+
+  !$acc data copy (data(5:20))
+  call test_data (data, n, m)
+  !$acc end data
+
+  do i = 1, n
+     bounded = i < m .or. i >= m+m
+     if (bounded .and. (data(i) /= -1)) then
+        call abort
+     else if (.not. bounded .and. data(i) /= 10) then
+        call abort
+     end if
+  end do
+
+  deallocate (data)
+end program test
+
+subroutine test_data (data, n, m)
+  implicit none
+
+  integer :: n, m, data(n), i
+
+  !$acc parallel loop present (data(m:m))
+  do i = m, m+m-1
+     data(i) = m
+  end do
+  !$acc end parallel loop
+end subroutine test_data

Reply via email to