diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map
index fe38dc6..663c27c 100644
--- a/libgomp/libgomp.map
+++ b/libgomp/libgomp.map
@@ -318,6 +318,7 @@ GOACC_2.0 {
global:
GOACC_data_end;
GOACC_data_start;
+ GOACC_declare;
GOACC_enter_exit_data;
GOACC_parallel;
GOACC_update;
@@ -331,6 +332,7 @@ GOACC_2.0.GOMP_4_BRANCH {
GOACC_deviceptr;
GOACC_get_ganglocal_ptr;
GOACC_kernels;
+ GOACC_register_static;
} GOACC_2.0;
GOMP_PLUGIN_1.0 {
diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c
index 9f24dc3..e772f48 100644
--- a/libgomp/oacc-init.c
+++ b/libgomp/oacc-init.c
@@ -205,6 +205,8 @@ acc_shutdown_1 (acc_device_t d)
if (!base_dev)
gomp_fatal ("device %s not supported", name_of_acc_device_t (d));
+ goacc_deallocate_static (d);
+
gomp_mutex_lock (&goacc_thread_lock);
/* Free target-specific TLS data and close all devices. */
@@ -373,7 +375,9 @@ goacc_attach_host_thread_to_device (int ord)
void
acc_init (acc_device_t d)
{
- if (!cached_base_dev)
+ bool init = !cached_base_dev;
+
+ if (init)
gomp_init_targets_once ();
gomp_mutex_lock (&acc_device_lock);
@@ -381,6 +385,9 @@ acc_init (acc_device_t d)
cached_base_dev = acc_init_1 (d);
gomp_mutex_unlock (&acc_device_lock);
+
+ if (init)
+ goacc_allocate_static (d);
goacc_attach_host_thread_to_device (-1);
}
diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h
index 0ace737..8f4938e 100644
--- a/libgomp/oacc-int.h
+++ b/libgomp/oacc-int.h
@@ -98,6 +98,9 @@ void goacc_save_and_set_bind (acc_device_t);
void goacc_restore_bind (void);
void goacc_lazy_initialize (void);
+void goacc_allocate_static (acc_device_t);
+void goacc_deallocate_static (acc_device_t);
+
#ifdef HAVE_ATTRIBUTE_VISIBILITY
# pragma GCC visibility pop
#endif
diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c
index 513d0bc..70758bc 100644
--- a/libgomp/oacc-parallel.c
+++ b/libgomp/oacc-parallel.c
@@ -109,6 +109,68 @@ alloc_ganglocal_addrs (size_t mapnum, void **hostaddrs, size_t *sizes,
}
}
+static struct oacc_static
+{
+ void *addr;
+ size_t size;
+ unsigned short mask;
+ bool free;
+ struct oacc_static *next;
+} *oacc_statics;
+
+static bool alloc_done = false;
+
+void
+goacc_allocate_static (acc_device_t d)
+{
+ struct oacc_static *s;
+
+ if (alloc_done)
+ assert (0);
+
+ for (s = oacc_statics; s; s = s->next)
+ {
+ void *d;
+
+ switch (s->mask)
+ {
+ case GOMP_MAP_FORCE_ALLOC:
+ break;
+
+ case GOMP_MAP_FORCE_TO:
+ d = acc_deviceptr (s->addr);
+ acc_memcpy_to_device (d, s->addr, s->size);
+ break;
+
+ case GOMP_MAP_FORCE_DEVICEPTR:
+ case GOMP_MAP_DEVICE_RESIDENT:
+ case GOMP_MAP_LINK:
+ break;
+
+ default:
+ assert (0);
+ break;
+ }
+ }
+
+ alloc_done = true;
+}
+
+void
+goacc_deallocate_static (acc_device_t d)
+{
+ struct oacc_static *s;
+ unsigned short mask = GOMP_MAP_FORCE_DEALLOC;
+
+ if (!alloc_done)
+ return;
+
+ for (s = oacc_statics; s; s = s->next)
+ GOACC_enter_exit_data (d, 1, &s->addr, &s->size, &mask, 0, 0);
+
+ alloc_done = false;
+}
+
static void goacc_wait (int async, int num_waits, va_list ap);
void
@@ -592,3 +654,82 @@ GOACC_get_thread_num (int gang, int worker, int vector)
{
return 0;
}
+
+void
+GOACC_register_static (void *addr, int size, unsigned int mask)
+{
+ struct oacc_static *s;
+
+ s = (struct oacc_static *) malloc (sizeof (struct oacc_static));
+ s->addr = addr;
+ s->size = (size_t) size;
+ s->mask = mask;
+ s->free = false;
+ s->next = NULL;
+
+ if (oacc_statics)
+ s->next = oacc_statics;
+
+ oacc_statics = s;
+}
+
+#include <stdio.h>
+
+void
+GOACC_declare (int device, size_t mapnum,
+ void **hostaddrs, size_t *sizes, unsigned short *kinds)
+{
+ int i;
+
+ for (i = 0; i < mapnum; i++)
+ {
+ unsigned char kind = kinds[i] & 0xff;
+
+ if (kind == GOMP_MAP_POINTER || kind == GOMP_MAP_TO_PSET)
+ continue;
+
+ switch (kind)
+ {
+ case GOMP_MAP_FORCE_ALLOC:
+ case GOMP_MAP_FORCE_DEALLOC:
+ case GOMP_MAP_FORCE_FROM:
+ case GOMP_MAP_FORCE_TO:
+ case GOMP_MAP_POINTER:
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+ break;
+
+ case GOMP_MAP_FORCE_DEVICEPTR:
+ break;
+
+ case GOMP_MAP_ALLOC:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ {
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+ }
+ break;
+
+ case GOMP_MAP_TO:
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+
+ break;
+
+ case GOMP_MAP_FROM:
+ kinds[i] = GOMP_MAP_FORCE_FROM;
+ GOACC_enter_exit_data (device, 1, &hostaddrs[i], &sizes[i],
+ &kinds[i], 0, 0);
+ break;
+
+ case GOMP_MAP_FORCE_PRESENT:
+ if (!acc_is_present (hostaddrs[i], sizes[i]))
+ gomp_fatal ("[%p,%zd] is not mapped", hostaddrs[i], sizes[i]);
+ break;
+
+ default:
+ assert (0);
+ break;
+ }
+ }
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c++/declare-1.C b/libgomp/testsuite/libgomp.oacc-c++/declare-1.C
new file mode 100644
index 0000000..268809b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c++/declare-1.C
@@ -0,0 +1,24 @@
+
+template<class T>
+T foo()
+{
+ T a;
+ #pragma acc declare create (a)
+
+ #pragma acc parallel
+ {
+ a = 5;
+ }
+
+ return a;
+}
+
+int
+main (void)
+{
+ int rc;
+
+ rc = foo<int>();
+
+ return rc;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-1.c
new file mode 100644
index 0000000..59cfe51
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-1.c
@@ -0,0 +1,65 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <openacc.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+int b[8];
+#pragma acc declare create (b)
+
+int d[8] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+#pragma acc declare copyin (d)
+
+int
+main (int argc, char **argv)
+{
+ const int N = 8;
+ int a[N];
+ int e[N];
+#pragma acc declare create (e)
+ int i;
+
+ for (i = 0; i < N; i++)
+ a[i] = i + 1;
+
+ if (!acc_is_present (&b, sizeof (b)))
+ abort ();
+
+ if (!acc_is_present (&d, sizeof (d)))
+ abort ();
+
+ if (!acc_is_present (&e, sizeof (e)))
+ abort ();
+
+#pragma acc parallel copyin (a[0:N])
+ {
+ for (i = 0; i < N; i++)
+ {
+ b[i] = a[i];
+ a[i] = b[i];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != i + 1)
+ abort ();
+ }
+
+#pragma acc parallel copy (a[0:N])
+ {
+ for (i = 0; i < N; i++)
+ {
+ e[i] = a[i] + d[i];
+ a[i] = e[i];
+ }
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ if (a[i] != (i + 1) * 2)
+ abort ();
+ }
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-2.c
new file mode 100644
index 0000000..2078a33
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+
+#define N 16
+
+float c[N];
+#pragma acc declare device_resident (c)
+
+#pragma acc routine
+float
+subr2 (float a)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ c[i] = 2.0;
+
+ for (i = 0; i < N; i++)
+ a += c[i];
+
+ return a;
+}
+
+float b[N];
+#pragma acc declare copyin (b)
+
+#pragma acc routine
+float
+subr1 (float a)
+{
+ int i;
+
+ for (i = 0; i < N; i++)
+ a += b[i];
+
+ return a;
+}
+
+int
+main (int argc, char **argv)
+{
+ float a;
+ int i;
+
+ for (i = 0; i < 16; i++)
+ b[i] = 1.0;
+
+ a = 0.0;
+
+ a = subr1 (a);
+
+ if (a != 16.0)
+ abort ();
+
+ a = 0.0;
+
+ a = subr2 (a);
+
+ if (a != 32.0)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
new file mode 100644
index 0000000..c3a2187
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-3.c
@@ -0,0 +1,61 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+float *b;
+#pragma acc declare deviceptr (b)
+
+#pragma acc routine
+float *
+subr2 (void)
+{
+ return b;
+}
+
+float
+subr1 (float a)
+{
+ float b;
+#pragma acc declare present_or_copy (b)
+ float c;
+#pragma acc declare present_or_copyin (c)
+ float d;
+#pragma acc declare present_or_create (d)
+ float e;
+#pragma acc declare present_or_copyout (e)
+
+#pragma acc parallel copy (a)
+ {
+ b = a;
+ c = b;
+ d = c;
+ e = d;
+ a = e;
+ }
+
+ return a;
+}
+
+int
+main (int argc, char **argv)
+{
+ float a;
+ float *c;
+
+ a = 2.0;
+
+ a = subr1 (a);
+
+ if (a != 2.0)
+ abort ();
+
+ b = (float *) acc_malloc (sizeof (float));
+
+ c = subr2 ();
+
+ if (b != c)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-4.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-4.c
new file mode 100644
index 0000000..84ec64f
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/declare-4.c
@@ -0,0 +1,27 @@
+/* { dg-do run { target openacc_nvidia_accel_selected } } */
+
+#include <stdlib.h>
+#include <openacc.h>
+
+float b;
+#pragma acc declare link (b)
+
+int
+main (int argc, char **argv)
+{
+ float a;
+
+ a = 2.0;
+
+#pragma acc parallel copy (a)
+ {
+ b = a;
+ a = 1.0;
+ a = a + b;
+ }
+
+ if (a != 3.0)
+ abort ();
+
+ return 0;
+}
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
index 0bab5bd..4d58e70 100644
--- a/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-1.f90
@@ -1,5 +1,10 @@
! { dg-do run { target openacc_nvidia_accel_selected } }
+module vars
+ integer z
+ !$acc declare create (z)
+end module vars
+
subroutine subr6 (a, d)
integer, parameter :: N = 8
integer :: i
@@ -200,6 +205,7 @@ subroutine subr0 (a, b, c, d)
end subroutine
program main
+ use vars
use openacc
integer, parameter :: N = 8
integer :: a(N)
@@ -212,6 +218,8 @@ program main
c(:) = 4
d(:) = 5
+ if (acc_is_present (z) .neqv. .true.) call abort
+
call subr0 (a, b, c, d)
call test (a, .false.)
@@ -226,4 +234,5 @@ program main
if (d(i) .ne. 16) call abort
end do
+
end program
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-2.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-2.f90
new file mode 100644
index 0000000..9b75aa1
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-2.f90
@@ -0,0 +1,14 @@
+! { dg-do run { target openacc_nvidia_accel_selected } }
+
+module globalvars
+ integer a
+ !$acc declare create (a)
+end module globalvars
+
+program test
+ use globalvars
+ use openacc
+
+ if (acc_is_present (a) .neqv. .true.) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-3.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-3.f90
new file mode 100644
index 0000000..79fc011
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-3.f90
@@ -0,0 +1,65 @@
+! { dg-do run { target openacc_nvidia_accel_selected } }
+
+module globalvars
+ real b
+ !$acc declare link (b)
+end module globalvars
+
+program test
+ use openacc
+
+ real a
+ real c
+ !$acc declare link (c)
+
+ if (acc_is_present (b) .neqv. .false.) call abort
+ if (acc_is_present (c) .neqv. .false.) call abort
+
+ a = 0.0
+ b = 1.0
+
+ !$acc parallel copy (a) copyin (b)
+ b = b + 4.0
+ a = b
+ !$acc end parallel
+
+ if (a .ne. 5.0) call abort
+
+ if (acc_is_present (b) .neqv. .false.) call abort
+
+ a = 0.0
+
+ !$acc parallel copy (a) create (b)
+ b = 4.0
+ a = b
+ !$acc end parallel
+
+ if (a .ne. 4.0) call abort
+
+ if (acc_is_present (b) .neqv. .false.) call abort
+
+ a = 0.0
+
+ !$acc parallel copy (a) copy (b)
+ b = 4.0
+ a = b
+ !$acc end parallel
+
+ if (a .ne. 4.0) call abort
+ if (b .ne. 4.0) call abort
+
+ if (acc_is_present (b) .neqv. .false.) call abort
+
+ a = 0.0
+
+ !$acc parallel copy (a) copy (b) copy (c)
+ b = 4.0
+ c = b
+ a = c
+ !$acc end parallel
+
+ if (a .ne. 4.0) call abort
+
+ if (acc_is_present (b) .neqv. .false.) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-4.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-4.f90
new file mode 100644
index 0000000..997c8ac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-4.f90
@@ -0,0 +1,27 @@
+! { dg-do run { target openacc_nvidia_accel_selected } }
+
+module vars
+ real b
+ !$acc declare create (b)
+end module vars
+
+program test
+ use vars
+ use openacc
+ real a
+
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ a = 2.0
+
+ !$acc parallel copy (a)
+ b = a
+ a = 1.0
+ a = a + b
+ !$acc end parallel
+
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ if (a .ne. 3.0) call abort
+
+end program test
diff --git a/libgomp/testsuite/libgomp.oacc-fortran/declare-5.f90 b/libgomp/testsuite/libgomp.oacc-fortran/declare-5.f90
new file mode 100644
index 0000000..d7c9bac
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-fortran/declare-5.f90
@@ -0,0 +1,28 @@
+! { dg-do run { target openacc_nvidia_accel_selected } }
+
+module vars
+ implicit none
+ real b
+ !$acc declare device_resident (b)
+end module vars
+
+program test
+ use vars
+ use openacc
+ real a
+
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ a = 2.0
+
+ !$acc parallel copy (a)
+ b = a
+ a = 1.0
+ a = a + b
+ !$acc end parallel
+
+ if (acc_is_present (b) .neqv. .true.) call abort
+
+ if (a .ne. 3.0) call abort
+
+end program test