Hi!

On Thu, 12 Dec 2013 12:31:40 +0100, I wrote:
> On Fri, 8 Nov 2013 16:40:00 +0100, Jakub Jelinek <ja...@redhat.com> wrote:
> > [...], device 257 is just a temporary testing hack, [...]
> 
> > [...], once we have at least one supported offloading target,
> > hopefully we'll nuke device 257.
> 
> Hmm, in contrast, I'd advocate to preserve that device, under a proper
> ID, for two (similar) reasons: even if it's the same architecture, we'll
> still want a generic non-shared-memory "offloading target" for GCC
> testsuite usage.  We can't assume that any of the "real hardware"
> acceleration devices to be available, but will still want to test the
> non-shared-memory stuff.  And likewise, GCC users can use this for
> testing their code for shared-memory host (fallback) execution vs.
> non-shared-memory execution.  So basically, just like a user can decide
> to use OpenMP/libgomp, but tie the runtime down to just one thread; but
> that's still different from host fallback execution.  Makes sense?

Here is such a libgomp plugin plus the infrastructure for initial support
of non-shared memory host execution.  Any comments?

I have not yet integrated the plugin into the libgomp build system; use
something like:

    $ gcc -m64 -Wall -Wextra -shared -o libgomp-plugin-host.so.1 
[...]/libgomp/plugin-host.c -fPIC -O -DDEBUG

..., and then set LIBGOMP_PLUGIN_PATH=$PWD in the environment.  (Plus
OMP_DEFAULT_DEVICE=0, but that's the default.)

commit 8495aab54fb244ef2643e43eb3e91a092ff0b14e
Author: Thomas Schwinge <tho...@codesourcery.com>, James Norris 
<jnor...@codesourcery.com>
Date:   Wed Feb 19 16:53:14 2014 +0100

    libgomp: plugin for non-shared memory host execution.
    
        libgomp/
        * plugin-host.c: New file.
        * target.c (struct gomp_device_descr): Add device_alloc_func,
        device_free_func, device_dev2host_func, device_host2dev_func
        members.
        (gomp_load_plugin_for_device): Load these.
        (gomp_map_vars, gomp_unmap_tgt, gomp_unmap_vars, gomp_update): Use
        these.
        (resolve_device, gomp_find_available_plugins): Remove ID 257 hack.

diff --git libgomp/plugin-host.c libgomp/plugin-host.c
new file mode 100644
index 0000000..5354ebe
--- /dev/null
+++ libgomp/plugin-host.c
@@ -0,0 +1,84 @@
+/* Plugin for non-shared memory host execution.
+
+   Copyright (C) 2014 Free Software Foundation, Inc.
+
+   Contributed by Thomas Schwinge <tho...@codesourcery.com>.
+
+   This file is part of the GNU OpenMP Library (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Simple implementation of a libgomp plugin for non-shared memory host
+   execution.  */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+bool
+device_available (void)
+{
+#ifdef DEBUG
+  printf ("libgomp plugin: %s:%s\n", __FILE__, __FUNCTION__);
+#endif
+
+  return true;
+}
+
+void *
+device_alloc (size_t size)
+{
+  void *ptr = malloc (size);
+
+#ifdef DEBUG
+  printf ("libgomp plugin: %s:%s (%zd): %p\n", __FILE__, __FUNCTION__, size, 
ptr);
+#endif
+
+  return ptr;
+}
+
+void
+device_free (void *ptr)
+{
+#ifdef DEBUG
+  printf ("libgomp plugin: %s:%s (%p)\n", __FILE__, __FUNCTION__, ptr);
+#endif
+
+  free (ptr);
+}
+
+void *device_dev2host (void *dest, const void *src, size_t n)
+{
+#ifdef DEBUG
+  printf ("libgomp plugin: %s:%s (%p, %p, %zd)\n", __FILE__, __FUNCTION__, 
dest, src, n);
+#endif
+
+  return memcpy (dest, src, n);
+}
+
+void *device_host2dev (void *dest, const void *src, size_t n)
+{
+#ifdef DEBUG
+  printf ("libgomp plugin: %s:%s (%p, %p, %zd)\n", __FILE__, __FUNCTION__, 
dest, src, n);
+#endif
+
+  return memcpy (dest, src, n);
+}
diff --git libgomp/target.c libgomp/target.c
index 55d3891..48f35c4 100644
--- libgomp/target.c
+++ libgomp/target.c
@@ -122,6 +122,10 @@ struct gomp_device_descr
 
   /* Function handlers.  */
   bool (*device_available_func) (void);
+  void *(*device_alloc_func) (size_t);
+  void (*device_free_func) (void *);
+  void *(*device_dev2host_func)(void *, const void *, size_t);
+  void *(*device_host2dev_func)(void *, const void *, size_t);
 
   /* Splay tree containing information about mapped memory regions.  */
   struct splay_tree_s dev_splay_tree;
@@ -146,14 +150,9 @@ resolve_device (int device_id)
       device_id = icv->default_device_var;
     }
   if (device_id < 0
-      || (device_id >= gomp_get_num_devices ()
-         && device_id != 257))
+      || (device_id >= gomp_get_num_devices ()))
     return NULL;
 
-  /* FIXME: Temporary hack for testing non-shared address spaces on host.  */
-  if (device_id == 257)
-    return &devices[0];
-
   return &devices[device_id];
 }
 
@@ -233,10 +232,10 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
 
   if (not_found_cnt || is_target)
     {
-      /* FIXME: This would be accelerator memory allocation, not
-        host, and should allocate tgt_align aligned tgt_size block
-        of memory.  */
-      tgt->to_free = gomp_malloc (tgt_size + tgt_align - 1);
+      /* Allocate tgt_align aligned tgt_size block of memory.  */
+      /* FIXME: Perhaps change interface to allocate properly aligned
+        memory.  */
+      tgt->to_free = devicep->device_alloc_func (tgt_size + tgt_align - 1);
       tgt->tgt_start = (uintptr_t) tgt->to_free;
       tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1);
       tgt->tgt_end = tgt->tgt_start + tgt_size;
@@ -297,13 +296,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                    break;
                  case 1: /* TO */
                  case 3: /* TOFROM */
-                   /* FIXME: This is supposed to be copy from host to device
-                      memory.  Perhaps add some smarts, like if copying
+                   /* Copy from host to device memory.  */
+                   /* FIXME: Perhaps add some smarts, like if copying
                       several adjacent fields from host to target, use some
                       host buffer to avoid sending each var individually.  */
-                   memcpy ((void *) (tgt->tgt_start + k->tgt_offset),
-                           (void *) k->host_start,
-                           k->host_end - k->host_start);
+                   devicep->device_host2dev_func((void *) (tgt->tgt_start
+                                                           + k->tgt_offset),
+                                                 (void *) k->host_start,
+                                                 k->host_end - k->host_start);
                    break;
                  case 4: /* POINTER */
                    cur_node.host_start
@@ -337,10 +337,12 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
                       array section.  Now subtract bias to get what we want
                       to initialize the pointer with.  */
                    cur_node.tgt_offset -= sizes[i];
-                   /* FIXME: host to device copy, see above FIXME comment.  */
-                   memcpy ((void *) (tgt->tgt_start + k->tgt_offset),
-                           (void *) &cur_node.tgt_offset,
-                           sizeof (void *));
+                   /* Copy from host to device memory.  */
+                   /* FIXME: see above FIXME comment.  */
+                   devicep->device_host2dev_func ((void *) (tgt->tgt_start
+                                                            + k->tgt_offset),
+                                                  (void *) 
&cur_node.tgt_offset,
+                                                  sizeof (void *));
                    break;
                  }
                array++;
@@ -353,10 +355,12 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
        {
          cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start
                                + tgt->list[i]->tgt_offset;
-         /* FIXME: host to device copy, see above FIXME comment.  */
-         memcpy ((void *) (tgt->tgt_start + i * sizeof (void *)),
-                 (void *) &cur_node.tgt_offset,
-                 sizeof (void *));
+         /* Copy from host to device memory.  */
+         /* FIXME: see above FIXME comment.  */
+         devicep->device_host2dev_func ((void *) (tgt->tgt_start
+                                                  + i * sizeof (void *)),
+                                        (void *) &cur_node.tgt_offset,
+                                        sizeof (void *));
        }
     }
 
@@ -367,10 +371,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t 
mapnum,
 static void
 gomp_unmap_tgt (struct target_mem_desc *tgt)
 {
-  /* FIXME: Deallocate on target the tgt->tgt_start .. tgt->tgt_end
-     region.  */
+  /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region.  */
   if (tgt->tgt_end)
-    free (tgt->to_free);
+    tgt->device_descr->device_free_func(tgt->to_free);
 
   free (tgt->array);
   free (tgt);
@@ -396,10 +399,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt)
       {
        splay_tree_key k = tgt->list[i];
        if (k->copy_from)
-         /* FIXME: device to host copy.  */
-         memcpy ((void *) k->host_start,
-                 (void *) (k->tgt->tgt_start + k->tgt_offset),
-                 k->host_end - k->host_start);
+         /* Copy from device to host memory.  */
+         devicep->device_dev2host_func ((void *) k->host_start,
+                                        (void *) (k->tgt->tgt_start
+                                                  + k->tgt_offset),
+                                        k->host_end - k->host_start);
        splay_tree_remove (&devicep->dev_splay_tree, k);
        if (k->tgt->refcount > 1)
          k->tgt->refcount--;
@@ -446,17 +450,23 @@ gomp_update (struct gomp_device_descr *devicep, size_t 
mapnum,
                          (void *) n->host_start,
                          (void *) n->host_end);
            if ((kinds[i] & 7) == 1)
-             /* FIXME: host to device copy.  */
-             memcpy ((void *) (n->tgt->tgt_start + n->tgt_offset
-                               + cur_node.host_start - n->host_start),
-                     (void *) cur_node.host_start,
-                     cur_node.host_end - cur_node.host_start);
+             /* Copy from host to device memory.  */
+             devicep->device_host2dev_func ((void *) (n->tgt->tgt_start
+                                                      + n->tgt_offset
+                                                      + cur_node.host_start
+                                                      - n->host_start),
+                                            (void *) cur_node.host_start,
+                                            cur_node.host_end
+                                            - cur_node.host_start);
            else if ((kinds[i] & 7) == 2)
-             /* FIXME: device to host copy.  */
-             memcpy ((void *) cur_node.host_start,
-                     (void *) (n->tgt->tgt_start + n->tgt_offset
-                               + cur_node.host_start - n->host_start),
-                     cur_node.host_end - cur_node.host_start);
+             /* Copy from device to host memory.  */
+             devicep->device_dev2host_func ((void *) cur_node.host_start,
+                                            (void *) (n->tgt->tgt_start
+                                                      + n->tgt_offset
+                                                      + cur_node.host_start
+                                                      - n->host_start),
+                                            cur_node.host_end
+                                            - cur_node.host_start);
          }
        else
          gomp_fatal ("Trying to update [%p..%p) object that is not mapped",
@@ -608,28 +618,43 @@ static bool
 gomp_load_plugin_for_device (struct gomp_device_descr *device,
                             const char *plugin_name)
 {
-  if (!device || !plugin_name)
-    return false;
-
-  device->plugin_handle = dlopen (plugin_name, RTLD_LAZY);
-  if (!device->plugin_handle)
-    return false;
+  char *err = NULL;
 
   /* Clear any existing error.  */
   dlerror ();
 
+  device->plugin_handle = dlopen (plugin_name, RTLD_LAZY);
+  if (!device->plugin_handle)
+    {
+      err = dlerror ();
+      goto out;
+    }
+
   /* Check if all required functions are available in the plugin and store
-     their handlers.
-     TODO: check for other routines as well.  */
-  device->device_available_func = dlsym (device->plugin_handle,
-                                        "device_available");
-  if (dlerror () != NULL)
+     their handlers.  */
+#define DLSYM(f) \
+  do                                                                   \
+    {                                                                  \
+      device->f##_func = dlsym (device->plugin_handle, #f);            \
+      err = dlerror ();                                                        
\
+      if (err != NULL)                                                 \
+       goto out;                                                       \
+    }                                                                  \
+  while (0)
+  DLSYM (device_available);
+  DLSYM (device_alloc);
+  DLSYM (device_free);
+  DLSYM (device_dev2host);
+  DLSYM (device_host2dev);
+#undef DLSYM
+
+ out:
+  if (err != NULL)
     {
+      gomp_error ("while loading %s: %s", plugin_name, err);
       dlclose (device->plugin_handle);
-      return false;
     }
-
-  return true;
+  return err == NULL;
 }
 
 /* This functions scans folder, specified in environment variable
@@ -674,7 +699,6 @@ gomp_find_available_plugins (void)
       if (devices == NULL)
        {
          num_devices = 0;
-         closedir (dir);
          goto out;
        }
 
@@ -684,26 +708,10 @@ gomp_find_available_plugins (void)
       gomp_mutex_init (&devices[num_devices].dev_env_lock);
       num_devices++;
     }
-  closedir (dir);
 
  out:
-  /* FIXME: Temporary hack for testing non-shared address spaces on host.
-     We create device 257 just to check memory mapping.  */
-  if (num_devices == 0)
-    {
-      num_devices = 1;
-      devices = malloc (sizeof (struct gomp_device_descr));
-      if (devices == NULL)
-       {
-         num_devices = 0;
-         return;
-       }
-      devices[0].plugin_handle = NULL;
-      devices[0].device_available_func = NULL;
-      devices[0].dev_splay_tree.root = NULL;
-      gomp_mutex_init (&devices[0].dev_env_lock);
-    }
-  devices[0].id = 257;
+  if (dir)
+    closedir (dir);
 }
 
 /* This function initializes runtime needed for offloading.


Grüße,
 Thomas

Attachment: pgpEOibMs9aIU.pgp
Description: PGP signature

Reply via email to