Hi! On Thu, 12 Dec 2013 12:31:40 +0100, I wrote: > On Fri, 8 Nov 2013 16:40:00 +0100, Jakub Jelinek <ja...@redhat.com> wrote: > > [...], device 257 is just a temporary testing hack, [...] > > > [...], once we have at least one supported offloading target, > > hopefully we'll nuke device 257. > > Hmm, in contrast, I'd advocate to preserve that device, under a proper > ID, for two (similar) reasons: even if it's the same architecture, we'll > still want a generic non-shared-memory "offloading target" for GCC > testsuite usage. We can't assume that any of the "real hardware" > acceleration devices to be available, but will still want to test the > non-shared-memory stuff. And likewise, GCC users can use this for > testing their code for shared-memory host (fallback) execution vs. > non-shared-memory execution. So basically, just like a user can decide > to use OpenMP/libgomp, but tie the runtime down to just one thread; but > that's still different from host fallback execution. Makes sense?
Here is such a libgomp plugin plus the infrastructure for initial support of non-shared memory host execution. Any comments? I have not yet integrated the plugin into the libgomp build system; use something like: $ gcc -m64 -Wall -Wextra -shared -o libgomp-plugin-host.so.1 [...]/libgomp/plugin-host.c -fPIC -O -DDEBUG ..., and then set LIBGOMP_PLUGIN_PATH=$PWD in the environment. (Plus OMP_DEFAULT_DEVICE=0, but that's the default.) commit 8495aab54fb244ef2643e43eb3e91a092ff0b14e Author: Thomas Schwinge <tho...@codesourcery.com>, James Norris <jnor...@codesourcery.com> Date: Wed Feb 19 16:53:14 2014 +0100 libgomp: plugin for non-shared memory host execution. libgomp/ * plugin-host.c: New file. * target.c (struct gomp_device_descr): Add device_alloc_func, device_free_func, device_dev2host_func, device_host2dev_func members. (gomp_load_plugin_for_device): Load these. (gomp_map_vars, gomp_unmap_tgt, gomp_unmap_vars, gomp_update): Use these. (resolve_device, gomp_find_available_plugins): Remove ID 257 hack. diff --git libgomp/plugin-host.c libgomp/plugin-host.c new file mode 100644 index 0000000..5354ebe --- /dev/null +++ libgomp/plugin-host.c @@ -0,0 +1,84 @@ +/* Plugin for non-shared memory host execution. + + Copyright (C) 2014 Free Software Foundation, Inc. + + Contributed by Thomas Schwinge <tho...@codesourcery.com>. + + This file is part of the GNU OpenMP Library (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* Simple implementation of a libgomp plugin for non-shared memory host + execution. */ + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +bool +device_available (void) +{ +#ifdef DEBUG + printf ("libgomp plugin: %s:%s\n", __FILE__, __FUNCTION__); +#endif + + return true; +} + +void * +device_alloc (size_t size) +{ + void *ptr = malloc (size); + +#ifdef DEBUG + printf ("libgomp plugin: %s:%s (%zd): %p\n", __FILE__, __FUNCTION__, size, ptr); +#endif + + return ptr; +} + +void +device_free (void *ptr) +{ +#ifdef DEBUG + printf ("libgomp plugin: %s:%s (%p)\n", __FILE__, __FUNCTION__, ptr); +#endif + + free (ptr); +} + +void *device_dev2host (void *dest, const void *src, size_t n) +{ +#ifdef DEBUG + printf ("libgomp plugin: %s:%s (%p, %p, %zd)\n", __FILE__, __FUNCTION__, dest, src, n); +#endif + + return memcpy (dest, src, n); +} + +void *device_host2dev (void *dest, const void *src, size_t n) +{ +#ifdef DEBUG + printf ("libgomp plugin: %s:%s (%p, %p, %zd)\n", __FILE__, __FUNCTION__, dest, src, n); +#endif + + return memcpy (dest, src, n); +} diff --git libgomp/target.c libgomp/target.c index 55d3891..48f35c4 100644 --- libgomp/target.c +++ libgomp/target.c @@ -122,6 +122,10 @@ struct gomp_device_descr /* Function handlers. */ bool (*device_available_func) (void); + void *(*device_alloc_func) (size_t); + void (*device_free_func) (void *); + void *(*device_dev2host_func)(void *, const void *, size_t); + void *(*device_host2dev_func)(void *, const void *, size_t); /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s dev_splay_tree; @@ -146,14 +150,9 @@ resolve_device (int device_id) device_id = icv->default_device_var; } if (device_id < 0 - || (device_id >= gomp_get_num_devices () - && device_id != 257)) + || (device_id >= gomp_get_num_devices ())) return NULL; - /* FIXME: Temporary hack for testing non-shared address spaces on host. */ - if (device_id == 257) - return &devices[0]; - return &devices[device_id]; } @@ -233,10 +232,10 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, if (not_found_cnt || is_target) { - /* FIXME: This would be accelerator memory allocation, not - host, and should allocate tgt_align aligned tgt_size block - of memory. */ - tgt->to_free = gomp_malloc (tgt_size + tgt_align - 1); + /* Allocate tgt_align aligned tgt_size block of memory. */ + /* FIXME: Perhaps change interface to allocate properly aligned + memory. */ + tgt->to_free = devicep->device_alloc_func (tgt_size + tgt_align - 1); tgt->tgt_start = (uintptr_t) tgt->to_free; tgt->tgt_start = (tgt->tgt_start + tgt_align - 1) & ~(tgt_align - 1); tgt->tgt_end = tgt->tgt_start + tgt_size; @@ -297,13 +296,14 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, break; case 1: /* TO */ case 3: /* TOFROM */ - /* FIXME: This is supposed to be copy from host to device - memory. Perhaps add some smarts, like if copying + /* Copy from host to device memory. */ + /* FIXME: Perhaps add some smarts, like if copying several adjacent fields from host to target, use some host buffer to avoid sending each var individually. */ - memcpy ((void *) (tgt->tgt_start + k->tgt_offset), - (void *) k->host_start, - k->host_end - k->host_start); + devicep->device_host2dev_func((void *) (tgt->tgt_start + + k->tgt_offset), + (void *) k->host_start, + k->host_end - k->host_start); break; case 4: /* POINTER */ cur_node.host_start @@ -337,10 +337,12 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, array section. Now subtract bias to get what we want to initialize the pointer with. */ cur_node.tgt_offset -= sizes[i]; - /* FIXME: host to device copy, see above FIXME comment. */ - memcpy ((void *) (tgt->tgt_start + k->tgt_offset), - (void *) &cur_node.tgt_offset, - sizeof (void *)); + /* Copy from host to device memory. */ + /* FIXME: see above FIXME comment. */ + devicep->device_host2dev_func ((void *) (tgt->tgt_start + + k->tgt_offset), + (void *) &cur_node.tgt_offset, + sizeof (void *)); break; } array++; @@ -353,10 +355,12 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, { cur_node.tgt_offset = tgt->list[i]->tgt->tgt_start + tgt->list[i]->tgt_offset; - /* FIXME: host to device copy, see above FIXME comment. */ - memcpy ((void *) (tgt->tgt_start + i * sizeof (void *)), - (void *) &cur_node.tgt_offset, - sizeof (void *)); + /* Copy from host to device memory. */ + /* FIXME: see above FIXME comment. */ + devicep->device_host2dev_func ((void *) (tgt->tgt_start + + i * sizeof (void *)), + (void *) &cur_node.tgt_offset, + sizeof (void *)); } } @@ -367,10 +371,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, static void gomp_unmap_tgt (struct target_mem_desc *tgt) { - /* FIXME: Deallocate on target the tgt->tgt_start .. tgt->tgt_end - region. */ + /* Deallocate on target the tgt->tgt_start .. tgt->tgt_end region. */ if (tgt->tgt_end) - free (tgt->to_free); + tgt->device_descr->device_free_func(tgt->to_free); free (tgt->array); free (tgt); @@ -396,10 +399,11 @@ gomp_unmap_vars (struct target_mem_desc *tgt) { splay_tree_key k = tgt->list[i]; if (k->copy_from) - /* FIXME: device to host copy. */ - memcpy ((void *) k->host_start, - (void *) (k->tgt->tgt_start + k->tgt_offset), - k->host_end - k->host_start); + /* Copy from device to host memory. */ + devicep->device_dev2host_func ((void *) k->host_start, + (void *) (k->tgt->tgt_start + + k->tgt_offset), + k->host_end - k->host_start); splay_tree_remove (&devicep->dev_splay_tree, k); if (k->tgt->refcount > 1) k->tgt->refcount--; @@ -446,17 +450,23 @@ gomp_update (struct gomp_device_descr *devicep, size_t mapnum, (void *) n->host_start, (void *) n->host_end); if ((kinds[i] & 7) == 1) - /* FIXME: host to device copy. */ - memcpy ((void *) (n->tgt->tgt_start + n->tgt_offset - + cur_node.host_start - n->host_start), - (void *) cur_node.host_start, - cur_node.host_end - cur_node.host_start); + /* Copy from host to device memory. */ + devicep->device_host2dev_func ((void *) (n->tgt->tgt_start + + n->tgt_offset + + cur_node.host_start + - n->host_start), + (void *) cur_node.host_start, + cur_node.host_end + - cur_node.host_start); else if ((kinds[i] & 7) == 2) - /* FIXME: device to host copy. */ - memcpy ((void *) cur_node.host_start, - (void *) (n->tgt->tgt_start + n->tgt_offset - + cur_node.host_start - n->host_start), - cur_node.host_end - cur_node.host_start); + /* Copy from device to host memory. */ + devicep->device_dev2host_func ((void *) cur_node.host_start, + (void *) (n->tgt->tgt_start + + n->tgt_offset + + cur_node.host_start + - n->host_start), + cur_node.host_end + - cur_node.host_start); } else gomp_fatal ("Trying to update [%p..%p) object that is not mapped", @@ -608,28 +618,43 @@ static bool gomp_load_plugin_for_device (struct gomp_device_descr *device, const char *plugin_name) { - if (!device || !plugin_name) - return false; - - device->plugin_handle = dlopen (plugin_name, RTLD_LAZY); - if (!device->plugin_handle) - return false; + char *err = NULL; /* Clear any existing error. */ dlerror (); + device->plugin_handle = dlopen (plugin_name, RTLD_LAZY); + if (!device->plugin_handle) + { + err = dlerror (); + goto out; + } + /* Check if all required functions are available in the plugin and store - their handlers. - TODO: check for other routines as well. */ - device->device_available_func = dlsym (device->plugin_handle, - "device_available"); - if (dlerror () != NULL) + their handlers. */ +#define DLSYM(f) \ + do \ + { \ + device->f##_func = dlsym (device->plugin_handle, #f); \ + err = dlerror (); \ + if (err != NULL) \ + goto out; \ + } \ + while (0) + DLSYM (device_available); + DLSYM (device_alloc); + DLSYM (device_free); + DLSYM (device_dev2host); + DLSYM (device_host2dev); +#undef DLSYM + + out: + if (err != NULL) { + gomp_error ("while loading %s: %s", plugin_name, err); dlclose (device->plugin_handle); - return false; } - - return true; + return err == NULL; } /* This functions scans folder, specified in environment variable @@ -674,7 +699,6 @@ gomp_find_available_plugins (void) if (devices == NULL) { num_devices = 0; - closedir (dir); goto out; } @@ -684,26 +708,10 @@ gomp_find_available_plugins (void) gomp_mutex_init (&devices[num_devices].dev_env_lock); num_devices++; } - closedir (dir); out: - /* FIXME: Temporary hack for testing non-shared address spaces on host. - We create device 257 just to check memory mapping. */ - if (num_devices == 0) - { - num_devices = 1; - devices = malloc (sizeof (struct gomp_device_descr)); - if (devices == NULL) - { - num_devices = 0; - return; - } - devices[0].plugin_handle = NULL; - devices[0].device_available_func = NULL; - devices[0].dev_splay_tree.root = NULL; - gomp_mutex_init (&devices[0].dev_env_lock); - } - devices[0].id = 257; + if (dir) + closedir (dir); } /* This function initializes runtime needed for offloading. Grüße, Thomas
pgpEOibMs9aIU.pgp
Description: PGP signature