On 2018/9/25 9:10 PM, Chung-Lin Tang wrote:
Hi Thomas,
These are the OpenACC specific changes, mostly the re-implementation of 
async-related acc_* runtime
library API functions to use the new backend plugin interfaces, in a non-target 
specific way.


Hi Thomas,
this part includes some of the lookup_goacc_asyncqueue fixes we talked about.
I am still thinking about how the queue lock problem should really be solved, 
so regard
this patch as just fixing some of the problems.


diff -ru trunk-orig/libgomp/oacc-async.c trunk-work/libgomp/oacc-async.c
--- trunk-orig/libgomp/oacc-async.c     2018-12-14 22:11:29.252251925 +0800
+++ trunk-work/libgomp/oacc-async.c     2018-12-18 22:19:51.923102938 +0800
@@ -70,12 +70,16 @@
 
   struct gomp_device_descr *dev = thr->dev;
 
+  gomp_mutex_lock (&dev->openacc.async.lock);
+
   if (!create
       && (async >= dev->openacc.async.nasyncqueue
          || !dev->openacc.async.asyncqueue[async]))
-    return NULL;
+    {
+      gomp_mutex_unlock (&dev->openacc.async.lock);
+      return NULL;
+    }
 
-  gomp_mutex_lock (&dev->openacc.async.lock);
   if (async >= dev->openacc.async.nasyncqueue)
     {
       int diff = async + 1 - dev->openacc.async.nasyncqueue;
@@ -91,6 +95,12 @@
     {
       dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func 
();
 
+      if (!dev->openacc.async.asyncqueue[async])
+       {
+         gomp_mutex_unlock (&dev->openacc.async.lock);
+         gomp_fatal ("async %d creation failed", async);
+       }
+      
       /* Link new async queue into active list.  */
       goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
       n->aq = dev->openacc.async.asyncqueue[async];
diff -ru trunk-orig/libgomp/oacc-host.c trunk-work/libgomp/oacc-host.c
--- trunk-orig/libgomp/oacc-host.c      2018-12-14 18:31:07.487203770 +0800
+++ trunk-work/libgomp/oacc-host.c      2018-12-18 22:23:26.771807667 +0800
@@ -266,6 +266,9 @@
 
       .exec_func = host_openacc_exec,
 
+      .create_thread_data_func = host_openacc_create_thread_data,
+      .destroy_thread_data_func = host_openacc_destroy_thread_data,
+
       .async = {
        .construct_func = host_openacc_async_construct,
        .destruct_func = host_openacc_async_destruct,
@@ -278,9 +281,6 @@
        .host2dev_func = host_openacc_async_host2dev,
       },
 
-      .create_thread_data_func = host_openacc_create_thread_data,
-      .destroy_thread_data_func = host_openacc_destroy_thread_data,
-
       .cuda = {
        .get_current_device_func = NULL,
        .get_current_context_func = NULL,
diff -ru trunk-orig/libgomp/oacc-plugin.c trunk-work/libgomp/oacc-plugin.c
--- trunk-orig/libgomp/oacc-plugin.c    2018-12-14 18:31:07.491203745 +0800
+++ trunk-work/libgomp/oacc-plugin.c    2018-12-18 22:27:46.047722004 +0800
@@ -30,6 +30,13 @@
 #include "oacc-plugin.h"
 #include "oacc-int.h"
 
+void
+GOMP_PLUGIN_async_unmap_vars (void *ptr __attribute__((unused)),
+                             int async __attribute__((unused)))
+{
+  gomp_fatal ("invalid plugin function");
+}
+
 /* Return the target-specific part of the TLS data for the current thread.  */
 
 void *
diff -ru trunk-orig/libgomp/plugin/plugin-nvptx.c 
trunk-work/libgomp/plugin/plugin-nvptx.c
Index: libgomp/oacc-async.c
===================================================================
--- libgomp/oacc-async.c        (revision 267226)
+++ libgomp/oacc-async.c        (working copy)
@@ -27,10 +27,97 @@
    <http://www.gnu.org/licenses/>.  */
 
 #include <assert.h>
+#include <string.h>
 #include "openacc.h"
 #include "libgomp.h"
 #include "oacc-int.h"
 
+static struct goacc_thread *
+get_goacc_thread (void)
+{
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  return thr;
+}
+
+static struct gomp_device_descr *
+get_goacc_thread_device (void)
+{
+  struct goacc_thread *thr = goacc_thread ();
+
+  if (!thr || !thr->dev)
+    gomp_fatal ("no device active");
+
+  return thr->dev;
+}
+
+attribute_hidden struct goacc_asyncqueue *
+lookup_goacc_asyncqueue (struct goacc_thread *thr, bool create, int async)
+{
+  /* The special value acc_async_noval (-1) maps to the thread-specific
+     default async stream.  */
+  if (async == acc_async_noval)
+    async = thr->default_async;
+
+  if (async == acc_async_sync)
+    return NULL;
+
+  if (async < 0)
+    gomp_fatal ("bad async %d", async);
+
+  struct gomp_device_descr *dev = thr->dev;
+
+  gomp_mutex_lock (&dev->openacc.async.lock);
+
+  if (!create
+      && (async >= dev->openacc.async.nasyncqueue
+         || !dev->openacc.async.asyncqueue[async]))
+    {
+      gomp_mutex_unlock (&dev->openacc.async.lock);
+      return NULL;
+    }
+
+  if (async >= dev->openacc.async.nasyncqueue)
+    {
+      int diff = async + 1 - dev->openacc.async.nasyncqueue;
+      dev->openacc.async.asyncqueue
+       = gomp_realloc (dev->openacc.async.asyncqueue,
+                       sizeof (goacc_aq) * (async + 1));
+      memset (dev->openacc.async.asyncqueue + dev->openacc.async.nasyncqueue,
+             0, sizeof (goacc_aq) * diff);
+      dev->openacc.async.nasyncqueue = async + 1;
+    }
+
+  if (!dev->openacc.async.asyncqueue[async])
+    {
+      dev->openacc.async.asyncqueue[async] = dev->openacc.async.construct_func 
();
+
+      if (!dev->openacc.async.asyncqueue[async])
+       {
+         gomp_mutex_unlock (&dev->openacc.async.lock);
+         gomp_fatal ("async %d creation failed", async);
+       }
+      
+      /* Link new async queue into active list.  */
+      goacc_aq_list n = gomp_malloc (sizeof (struct goacc_asyncqueue_list));
+      n->aq = dev->openacc.async.asyncqueue[async];
+      n->next = dev->openacc.async.active;
+      dev->openacc.async.active = n;
+    }
+  gomp_mutex_unlock (&dev->openacc.async.lock);
+  return dev->openacc.async.asyncqueue[async];
+}
+
+attribute_hidden struct goacc_asyncqueue *
+get_goacc_asyncqueue (int async)
+{
+  struct goacc_thread *thr = get_goacc_thread ();
+  return lookup_goacc_asyncqueue (thr, true, async);
+}
+
 int
 acc_async_test (int async)
 {
@@ -42,18 +129,25 @@ acc_async_test (int async)
   if (!thr || !thr->dev)
     gomp_fatal ("no device active");
 
-  return thr->dev->openacc.async_test_func (async);
+  goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async);
+  return thr->dev->openacc.async.test_func (aq);
 }
 
 int
 acc_async_test_all (void)
 {
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
-
-  return thr->dev->openacc.async_test_all_func ();
+  int ret = 1;
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    if (!thr->dev->openacc.async.test_func (l->aq))
+      {
+       ret = 0;
+       break;
+      }
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+  return ret;
 }
 
 void
@@ -62,12 +156,10 @@ acc_wait (int async)
   if (!async_valid_p (async))
     gomp_fatal ("invalid async argument: %d", async);
 
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
-
-  thr->dev->openacc.async_wait_func (async);
+  goacc_aq aq = lookup_goacc_asyncqueue (thr, true, async);
+  thr->dev->openacc.async.synchronize_func (aq);
 }
 
 /* acc_async_wait is an OpenACC 1.0 compatibility name for acc_wait.  */
@@ -84,23 +176,28 @@ acc_async_wait (int async)
 void
 acc_wait_async (int async1, int async2)
 {
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  goacc_aq aq2 = lookup_goacc_asyncqueue (thr, true, async2);
+  goacc_aq aq1 = lookup_goacc_asyncqueue (thr, false, async1);
+  if (!aq1)
+    gomp_fatal ("invalid async 1");
+  if (aq1 == aq2)
+    gomp_fatal ("identical parameters");
 
-  thr->dev->openacc.async_wait_async_func (async1, async2);
+  thr->dev->openacc.async.synchronize_func (aq1);
+  thr->dev->openacc.async.serialize_func (aq1, aq2);
 }
 
 void
 acc_wait_all (void)
 {
-  struct goacc_thread *thr = goacc_thread ();
+  struct gomp_device_descr *dev = get_goacc_thread_device ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
-
-  thr->dev->openacc.async_wait_all_func ();
+  gomp_mutex_lock (&dev->openacc.async.lock);
+  for (goacc_aq_list l = dev->openacc.async.active; l; l = l->next)
+    dev->openacc.async.synchronize_func (l->aq);
+  gomp_mutex_unlock (&dev->openacc.async.lock);
 }
 
 /* acc_async_wait_all is an OpenACC 1.0 compatibility name for acc_wait_all.  
*/
@@ -120,10 +217,74 @@ acc_wait_all_async (int async)
   if (!async_valid_p (async))
     gomp_fatal ("invalid async argument: %d", async);
 
-  struct goacc_thread *thr = goacc_thread ();
+  struct goacc_thread *thr = get_goacc_thread ();
 
-  if (!thr || !thr->dev)
-    gomp_fatal ("no device active");
+  goacc_aq waiting_queue = lookup_goacc_asyncqueue (thr, true, async);
 
-  thr->dev->openacc.async_wait_all_async_func (async);
+  gomp_mutex_lock (&thr->dev->openacc.async.lock);
+  for (goacc_aq_list l = thr->dev->openacc.async.active; l; l = l->next)
+    {
+      thr->dev->openacc.async.synchronize_func (l->aq);
+      if (waiting_queue)
+       thr->dev->openacc.async.serialize_func (l->aq, waiting_queue);
+    }
+  gomp_mutex_unlock (&thr->dev->openacc.async.lock);
 }
+
+int
+acc_get_default_async (void)
+{
+  struct goacc_thread *thr = get_goacc_thread ();
+  return thr->default_async;
+}
+
+void
+acc_set_default_async (int async)
+{
+  if (async < acc_async_sync)
+    gomp_fatal ("invalid async argument: %d", async);
+
+  struct goacc_thread *thr = get_goacc_thread ();
+  thr->default_async = async;
+}
+
+attribute_hidden void
+goacc_async_free (struct gomp_device_descr *devicep,
+                 struct goacc_asyncqueue *aq, void *ptr)
+{
+  if (!aq)
+    free (ptr);
+  else
+    devicep->openacc.async.queue_callback_func (aq, free, ptr);
+}
+
+attribute_hidden void
+goacc_init_asyncqueues (struct gomp_device_descr *devicep)
+{
+  gomp_mutex_init (&devicep->openacc.async.lock);
+  devicep->openacc.async.nasyncqueue = 0;
+  devicep->openacc.async.asyncqueue = NULL;
+  devicep->openacc.async.active = NULL;
+}
+
+attribute_hidden bool
+goacc_fini_asyncqueues (struct gomp_device_descr *devicep)
+{
+  bool ret = true;
+  if (devicep->openacc.async.nasyncqueue > 0)
+    {
+      goacc_aq_list next;
+      for (goacc_aq_list l = devicep->openacc.async.active; l; l = next)
+       {
+         ret &= devicep->openacc.async.destruct_func (l->aq);
+         next = l->next;
+         free (l);
+       }
+      free (devicep->openacc.async.asyncqueue);
+      devicep->openacc.async.nasyncqueue = 0;
+      devicep->openacc.async.asyncqueue = NULL;
+      devicep->openacc.async.active = NULL;
+    }
+  gomp_mutex_destroy (&devicep->openacc.async.lock);
+  return ret;
+}
Index: libgomp/oacc-cuda.c
===================================================================
--- libgomp/oacc-cuda.c (revision 267226)
+++ libgomp/oacc-cuda.c (working copy)
@@ -62,7 +62,11 @@ acc_get_cuda_stream (int async)
     return NULL;
 
   if (thr && thr->dev && thr->dev->openacc.cuda.get_stream_func)
-    return thr->dev->openacc.cuda.get_stream_func (async);
+    {
+      goacc_aq aq = lookup_goacc_asyncqueue (thr, false, async);
+      if (aq)
+       return thr->dev->openacc.cuda.get_stream_func (aq);
+    }
  
   return NULL;
 }
@@ -79,8 +83,14 @@ acc_set_cuda_stream (int async, void *stream)
 
   thr = goacc_thread ();
 
+  int ret = -1;
   if (thr && thr->dev && thr->dev->openacc.cuda.set_stream_func)
-    return thr->dev->openacc.cuda.set_stream_func (async, stream);
+    {
+      goacc_aq aq = get_goacc_asyncqueue (async);
+      gomp_mutex_lock (&thr->dev->openacc.async.lock);
+      ret = thr->dev->openacc.cuda.set_stream_func (aq, stream);
+      gomp_mutex_unlock (&thr->dev->openacc.async.lock);
+    }
 
-  return -1;
+  return ret;
 }
Index: libgomp/oacc-host.c
===================================================================
--- libgomp/oacc-host.c (revision 267226)
+++ libgomp/oacc-host.c (working copy)
@@ -140,8 +140,7 @@ host_openacc_exec (void (*fn) (void *),
                   size_t mapnum __attribute__ ((unused)),
                   void **hostaddrs,
                   void **devaddrs __attribute__ ((unused)),
-                  int async __attribute__ ((unused)),
-                  unsigned *dims __attribute ((unused)),
+                  unsigned *dims __attribute__ ((unused)),
                   void *targ_mem_desc __attribute__ ((unused)))
 {
   fn (hostaddrs);
@@ -148,49 +147,81 @@ host_openacc_exec (void (*fn) (void *),
 }
 
 static void
-host_openacc_register_async_cleanup (void *targ_mem_desc __attribute__ 
((unused)),
-                                    int async __attribute__ ((unused)))
+host_openacc_async_exec (void (*fn) (void *),
+                        size_t mapnum __attribute__ ((unused)),
+                        void **hostaddrs,
+                        void **devaddrs __attribute__ ((unused)),
+                        unsigned *dims __attribute__ ((unused)),
+                        void *targ_mem_desc __attribute__ ((unused)),
+                        struct goacc_asyncqueue *aq __attribute__ ((unused)))
 {
+  fn (hostaddrs);
 }
 
 static int
-host_openacc_async_test (int async __attribute__ ((unused)))
+host_openacc_async_test (struct goacc_asyncqueue *aq __attribute__ ((unused)))
 {
   return 1;
 }
 
-static int
-host_openacc_async_test_all (void)
+static void
+host_openacc_async_synchronize (struct goacc_asyncqueue *aq
+                               __attribute__ ((unused)))
 {
-  return 1;
 }
 
 static void
-host_openacc_async_wait (int async __attribute__ ((unused)))
+host_openacc_async_serialize (struct goacc_asyncqueue *aq1
+                             __attribute__ ((unused)),
+                             struct goacc_asyncqueue *aq2
+                             __attribute__ ((unused)))
 {
 }
 
-static void
-host_openacc_async_wait_async (int async1 __attribute__ ((unused)),
-                              int async2 __attribute__ ((unused)))
+static bool
+host_openacc_async_host2dev (int ord __attribute__ ((unused)),
+                            void *dst __attribute__ ((unused)),
+                            const void *src __attribute__ ((unused)),
+                            size_t n __attribute__ ((unused)),
+                            struct goacc_asyncqueue *aq
+                            __attribute__ ((unused)))
 {
+  return true;
 }
 
-static void
-host_openacc_async_wait_all (void)
+static bool
+host_openacc_async_dev2host (int ord __attribute__ ((unused)),
+                            void *dst __attribute__ ((unused)),
+                            const void *src __attribute__ ((unused)),
+                            size_t n __attribute__ ((unused)),
+                            struct goacc_asyncqueue *aq
+                            __attribute__ ((unused)))
 {
+  return true;
 }
 
 static void
-host_openacc_async_wait_all_async (int async __attribute__ ((unused)))
+host_openacc_async_queue_callback (struct goacc_asyncqueue *aq
+                                  __attribute__ ((unused)),
+                                  void (*callback_fn)(void *)
+                                  __attribute__ ((unused)),
+                                  void *userptr __attribute__ ((unused)))
 {
 }
 
-static void
-host_openacc_async_set_async (int async __attribute__ ((unused)))
+static struct goacc_asyncqueue *
+host_openacc_async_construct (void)
 {
+  return NULL;
 }
 
+static bool
+host_openacc_async_destruct (struct goacc_asyncqueue *aq
+                            __attribute__ ((unused)))
+{
+  return true;
+}
+
 static void *
 host_openacc_create_thread_data (int ord __attribute__ ((unused)))
 {
@@ -235,19 +266,21 @@ static struct gomp_device_descr host_dispatch =
 
       .exec_func = host_openacc_exec,
 
-      .register_async_cleanup_func = host_openacc_register_async_cleanup,
-
-      .async_test_func = host_openacc_async_test,
-      .async_test_all_func = host_openacc_async_test_all,
-      .async_wait_func = host_openacc_async_wait,
-      .async_wait_async_func = host_openacc_async_wait_async,
-      .async_wait_all_func = host_openacc_async_wait_all,
-      .async_wait_all_async_func = host_openacc_async_wait_all_async,
-      .async_set_async_func = host_openacc_async_set_async,
-
       .create_thread_data_func = host_openacc_create_thread_data,
       .destroy_thread_data_func = host_openacc_destroy_thread_data,
 
+      .async = {
+       .construct_func = host_openacc_async_construct,
+       .destruct_func = host_openacc_async_destruct,
+       .test_func = host_openacc_async_test,
+       .synchronize_func = host_openacc_async_synchronize,
+       .serialize_func = host_openacc_async_serialize,
+       .queue_callback_func = host_openacc_async_queue_callback,
+       .exec_func = host_openacc_async_exec,
+       .dev2host_func = host_openacc_async_dev2host,
+       .host2dev_func = host_openacc_async_host2dev,
+      },
+
       .cuda = {
        .get_current_device_func = NULL,
        .get_current_context_func = NULL,
Index: libgomp/oacc-init.c
===================================================================
--- libgomp/oacc-init.c (revision 267226)
+++ libgomp/oacc-init.c (working copy)
@@ -309,7 +309,7 @@ acc_shutdown_1 (acc_device_t d)
       if (acc_dev->state == GOMP_DEVICE_INITIALIZED)
         {
          devices_active = true;
-         ret &= acc_dev->fini_device_func (acc_dev->target_id);
+         ret &= gomp_fini_device (acc_dev);
          acc_dev->state = GOMP_DEVICE_UNINITIALIZED;
        }
       gomp_mutex_unlock (&acc_dev->lock);
@@ -426,8 +426,8 @@ goacc_attach_host_thread_to_device (int ord)
   
   thr->target_tls
     = acc_dev->openacc.create_thread_data_func (ord);
-  
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
+
+  thr->default_async = acc_async_default;
 }
 
 /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of
Index: libgomp/oacc-int.h
===================================================================
--- libgomp/oacc-int.h  (revision 267226)
+++ libgomp/oacc-int.h  (working copy)
@@ -73,6 +73,9 @@ struct goacc_thread
 
   /* Target-specific data (used by plugin).  */
   void *target_tls;
+
+  /* Default OpenACC async queue for current thread, exported to plugin.  */
+  int default_async;
 };
 
 #if defined HAVE_TLS || defined USE_EMUTLS
@@ -99,6 +102,14 @@ void goacc_restore_bind (void);
 void goacc_lazy_initialize (void);
 void goacc_host_init (void);
 
+void goacc_init_asyncqueues (struct gomp_device_descr *);
+bool goacc_fini_asyncqueues (struct gomp_device_descr *);
+void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *,
+                      void *);
+struct goacc_asyncqueue *get_goacc_asyncqueue (int);
+struct goacc_asyncqueue *lookup_goacc_asyncqueue (struct goacc_thread *, bool,
+                                                 int);
+
 static inline bool
 async_valid_stream_id_p (int async)
 {
Index: libgomp/oacc-mem.c
===================================================================
--- libgomp/oacc-mem.c  (revision 267226)
+++ libgomp/oacc-mem.c  (working copy)
@@ -172,18 +172,11 @@ memcpy_tofrom_device (bool from, void *d, void *h,
       return;
     }
 
-  if (async > acc_async_sync)
-    thr->dev->openacc.async_set_async_func (async);
-
-  bool ret = (from
-             ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s)
-             : thr->dev->host2dev_func (thr->dev->target_id, d, h, s));
-
-  if (async > acc_async_sync)
-    thr->dev->openacc.async_set_async_func (acc_async_sync);
-
-  if (!ret)
-    gomp_fatal ("error in %s", libfnname);
+  goacc_aq aq = get_goacc_asyncqueue (async);
+  if (from)
+    gomp_copy_dev2host (thr->dev, aq, h, d, s);
+  else
+    gomp_copy_host2dev (thr->dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
 }
 
 void
@@ -509,17 +502,13 @@ present_create_copy (unsigned f, void *h, size_t s
 
       gomp_mutex_unlock (&acc_dev->lock);
 
-      if (async > acc_async_sync)
-       acc_dev->openacc.async_set_async_func (async);
+      goacc_aq aq = get_goacc_asyncqueue (async);
 
-      tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true,
-                          GOMP_MAP_VARS_OPENACC);
+      tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s,
+                                &kinds, true, GOMP_MAP_VARS_OPENACC);
       /* Initialize dynamic refcount.  */
       tgt->list[0].key->dynamic_refcount = 1;
 
-      if (async > acc_async_sync)
-       acc_dev->openacc.async_set_async_func (acc_async_sync);
-
       gomp_mutex_lock (&acc_dev->lock);
 
       d = tgt->to_free;
@@ -676,13 +665,9 @@ delete_copyout (unsigned f, void *h, size_t s, int
 
       if (f & FLAG_COPYOUT)
        {
-         if (async > acc_async_sync)
-           acc_dev->openacc.async_set_async_func (async);
-         acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
-         if (async > acc_async_sync)
-           acc_dev->openacc.async_set_async_func (acc_async_sync);
+         goacc_aq aq = get_goacc_asyncqueue (async);
+         gomp_copy_dev2host (acc_dev, aq, h, d, s);
        }
-
       gomp_remove_var (acc_dev, n);
     }
 
@@ -765,17 +750,13 @@ update_dev_host (int is_dev, void *h, size_t s, in
   d = (void *) (n->tgt->tgt_start + n->tgt_offset
                + (uintptr_t) h - n->host_start);
 
-  if (async > acc_async_sync)
-    acc_dev->openacc.async_set_async_func (async);
+  goacc_aq aq = get_goacc_asyncqueue (async);
 
   if (is_dev)
-    acc_dev->host2dev_func (acc_dev->target_id, d, h, s);
+    gomp_copy_host2dev (acc_dev, aq, d, h, s, /* TODO: cbuf? */ NULL);
   else
-    acc_dev->dev2host_func (acc_dev->target_id, h, d, s);
+    gomp_copy_dev2host (acc_dev, aq, h, d, s);
 
-  if (async > acc_async_sync)
-    acc_dev->openacc.async_set_async_func (acc_async_sync);
-
   gomp_mutex_unlock (&acc_dev->lock);
 }
 
@@ -805,7 +786,7 @@ acc_update_self_async (void *h, size_t s, int asyn
 
 void
 gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes,
-                        void *kinds)
+                        void *kinds, int async)
 {
   struct target_mem_desc *tgt;
   struct goacc_thread *thr = goacc_thread ();
@@ -835,8 +816,9 @@ gomp_acc_insert_pointer (size_t mapnum, void **hos
     }
 
   gomp_debug (0, "  %s: prepare mappings\n", __FUNCTION__);
-  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs,
-                      NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
+  goacc_aq aq = get_goacc_asyncqueue (async);
+  tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs,
+                            NULL, sizes, kinds, true, GOMP_MAP_VARS_OPENACC);
   gomp_debug (0, "  %s: mappings prepared\n", __FUNCTION__);
 
   /* Initialize dynamic refcount.  */
@@ -930,7 +912,10 @@ gomp_acc_remove_pointer (void *h, size_t s, bool f
       if (async < acc_async_noval)
        gomp_unmap_vars (t, true);
       else
-       t->device_descr->openacc.register_async_cleanup_func (t, async);
+       {
+         goacc_aq aq = get_goacc_asyncqueue (async);
+         gomp_unmap_vars_async (t, true, aq);
+       }
     }
 
   gomp_mutex_unlock (&acc_dev->lock);
Index: libgomp/oacc-parallel.c
===================================================================
--- libgomp/oacc-parallel.c     (revision 267226)
+++ libgomp/oacc-parallel.c     (working copy)
@@ -208,8 +208,6 @@ GOACC_parallel_keyed (int device, void (*fn) (void
     }
   va_end (ap);
   
-  acc_dev->openacc.async_set_async_func (async);
-
   if (!(acc_dev->capabilities & GOMP_OFFLOAD_CAP_NATIVE_EXEC))
     {
       k.host_start = (uintptr_t) fn;
@@ -226,44 +224,29 @@ GOACC_parallel_keyed (int device, void (*fn) (void
   else
     tgt_fn = (void (*)) fn;
 
-  tgt = gomp_map_vars (acc_dev, mapnum, hostaddrs, NULL, sizes, kinds, true,
-                      GOMP_MAP_VARS_OPENACC);
+  goacc_aq aq = get_goacc_asyncqueue (async);
 
+  tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, NULL, sizes, 
kinds,
+                            true, GOMP_MAP_VARS_OPENACC);
+  
   devaddrs = gomp_alloca (sizeof (void *) * mapnum);
   for (i = 0; i < mapnum; i++)
     devaddrs[i] = (void *) (tgt->list[i].key->tgt->tgt_start
                            + tgt->list[i].key->tgt_offset
                            + tgt->list[i].offset);
-
-  acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
-                             async, dims, tgt);
-
-  /* If running synchronously, unmap immediately.  */
-  bool copyfrom = true;
-  if (async_synchronous_p (async))
-    gomp_unmap_vars (tgt, true);
+  if (aq == NULL)
+    {
+      acc_dev->openacc.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+                                 dims, tgt);
+      /* If running synchronously, unmap immediately.  */
+      gomp_unmap_vars (tgt, true);
+    }
   else
     {
-      bool async_unmap = false;
-      for (size_t i = 0; i < tgt->list_count; i++)
-       {
-         splay_tree_key k = tgt->list[i].key;
-         if (k && k->refcount == 1)
-           {
-             async_unmap = true;
-             break;
-           }
-       }
-      if (async_unmap)
-       tgt->device_descr->openacc.register_async_cleanup_func (tgt, async);
-      else
-       {
-         copyfrom = false;
-         gomp_unmap_vars (tgt, copyfrom);
-       }
+      acc_dev->openacc.async.exec_func (tgt_fn, mapnum, hostaddrs, devaddrs,
+                                       dims, tgt, aq);
+      gomp_unmap_vars_async (tgt, true, aq);
     }
-
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
 
 /* Legacy entry point, only provide host execution.  */
@@ -372,8 +355,6 @@ GOACC_enter_exit_data (int device, size_t mapnum,
        finalize = true;
     }
 
-  acc_dev->openacc.async_set_async_func (async);
-
   /* Determine if this is an "acc enter data".  */
   for (i = 0; i < mapnum; ++i)
     {
@@ -441,7 +422,7 @@ GOACC_enter_exit_data (int device, size_t mapnum,
          else
            {
              gomp_acc_insert_pointer (pointer, &hostaddrs[i],
-                                      &sizes[i], &kinds[i]);
+                                      &sizes[i], &kinds[i], async);
              /* Increment 'i' by two because OpenACC requires fortran
                 arrays to be contiguous, so each PSET is associated with
                 one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and
@@ -466,17 +447,17 @@ GOACC_enter_exit_data (int device, size_t mapnum,
                if (acc_is_present (hostaddrs[i], sizes[i]))
                  {
                    if (finalize)
-                     acc_delete_finalize (hostaddrs[i], sizes[i]);
+                     acc_delete_finalize_async (hostaddrs[i], sizes[i], async);
                    else
-                     acc_delete (hostaddrs[i], sizes[i]);
+                     acc_delete_async (hostaddrs[i], sizes[i], async);
                  }
                break;
              case GOMP_MAP_FROM:
              case GOMP_MAP_FORCE_FROM:
                if (finalize)
-                 acc_copyout_finalize (hostaddrs[i], sizes[i]);
+                 acc_copyout_finalize_async (hostaddrs[i], sizes[i], async);
                else
-                 acc_copyout (hostaddrs[i], sizes[i]);
+                 acc_copyout_async (hostaddrs[i], sizes[i], async);
                break;
              default:
                gomp_fatal (">>>> GOACC_enter_exit_data UNHANDLED kind 0x%.2x",
@@ -494,8 +475,6 @@ GOACC_enter_exit_data (int device, size_t mapnum,
            i += pointer - 1;
          }
       }
-
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
 
 static void
@@ -508,17 +487,22 @@ goacc_wait (int async, int num_waits, va_list *ap)
     {
       int qid = va_arg (*ap, int);
       
-      if (acc_async_test (qid))
+      goacc_aq aq = get_goacc_asyncqueue (qid);
+      if (acc_dev->openacc.async.test_func (aq))
        continue;
-
       if (async == acc_async_sync)
-       acc_wait (qid);
+       acc_dev->openacc.async.synchronize_func (aq);
       else if (qid == async)
-       ;/* If we're waiting on the same asynchronous queue as we're
-           launching on, the queue itself will order work as
-           required, so there's no need to wait explicitly.  */
+       /* If we're waiting on the same asynchronous queue as we're
+          launching on, the queue itself will order work as
+          required, so there's no need to wait explicitly.  */
+       ;
       else
-       acc_dev->openacc.async_wait_async_func (qid, async);
+       {
+         goacc_aq aq2 = get_goacc_asyncqueue (async);
+         acc_dev->openacc.async.synchronize_func (aq);
+         acc_dev->openacc.async.serialize_func (aq, aq2);
+       }
     }
 }
 
@@ -548,8 +532,6 @@ GOACC_update (int device, size_t mapnum,
       va_end (ap);
     }
 
-  acc_dev->openacc.async_set_async_func (async);
-
   bool update_device = false;
   for (i = 0; i < mapnum; ++i)
     {
@@ -589,7 +571,7 @@ GOACC_update (int device, size_t mapnum,
          /* Fallthru  */
        case GOMP_MAP_FORCE_TO:
          update_device = true;
-         acc_update_device (hostaddrs[i], sizes[i]);
+         acc_update_device_async (hostaddrs[i], sizes[i], async);
          break;
 
        case GOMP_MAP_FROM:
@@ -601,7 +583,7 @@ GOACC_update (int device, size_t mapnum,
          /* Fallthru  */
        case GOMP_MAP_FORCE_FROM:
          update_device = false;
-         acc_update_self (hostaddrs[i], sizes[i]);
+         acc_update_self_async (hostaddrs[i], sizes[i], async);
          break;
 
        default:
@@ -609,8 +591,6 @@ GOACC_update (int device, size_t mapnum,
          break;
        }
     }
-
-  acc_dev->openacc.async_set_async_func (acc_async_sync);
 }
 
 void
Index: libgomp/oacc-plugin.c
===================================================================
--- libgomp/oacc-plugin.c       (revision 267226)
+++ libgomp/oacc-plugin.c       (working copy)
@@ -31,14 +31,10 @@
 #include "oacc-int.h"
 
 void
-GOMP_PLUGIN_async_unmap_vars (void *ptr, int async)
+GOMP_PLUGIN_async_unmap_vars (void *ptr __attribute__((unused)),
+                             int async __attribute__((unused)))
 {
-  struct target_mem_desc *tgt = ptr;
-  struct gomp_device_descr *devicep = tgt->device_descr;
-
-  devicep->openacc.async_set_async_func (async);
-  gomp_unmap_vars (tgt, true);
-  devicep->openacc.async_set_async_func (acc_async_sync);
+  gomp_fatal ("invalid plugin function");
 }
 
 /* Return the target-specific part of the TLS data for the current thread.  */

Reply via email to