On 26 Jan 14:44, Thomas Schwinge wrote: > On 17 Jan 02:16, Ilya Verbin wrote: > > Such things are not covered by the > > testsuite, that's why you missed this issue. Here is a simple testcase: > > <http://news.gmane.org/find-root.php?message_id=%3C20150116231632.GB48380%40msticlxl57.ims.intel.com%3E> > > Probably a good motivation for adding such a test case. ;-)
I thought about it, but I don't know how to compile 2 binaries and run one of them using dejagnu. > > So, you don't assume that a device can have multiple images from multiple > > libs? > > This probably is "just" a bug that we introduced with our changes? > (Julian?) > > > Also, could you please explain, why did you divide a device initialization > > into > > two functions -- gomp_init_device and gomp_init_tables? > > As I understand it (again, Julian, please correct me if I got that > wrong), the reason is that for OpenACC support, we need these as two > separate (independent) actions. Is this causing problems for OpenMP > offloading? I'm asking since in this patch http://gcc.gnu.org/ml/gcc-patches/2014-11/msg01604.html I tried to change libgomp<->plugin interface to enable offloading from libs, loaded at any time. My proposal was to replace GOMP_OFFLOAD_register_image and GOMP_OFFLOAD_get_table with GOMP_OFFLOAD_[un]load_image. When target device is initialized, GOMP_OFFLOAD_load_image registers one image in the plugin and returns corresponding target addresses for the image. The mapping between host and target addresses happens as previously. I hope that this approach is suitable for both MIC and PTX. Here is my current patch, it works for OpenMP->MIC, but obviously will not work for PTX, since it requires symmetrical changes in the plugin. Could you please take a look, whether it is possible to support this new interface in PTX plugin? diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index d9cbff5..1072ae4 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -51,14 +51,12 @@ enum offload_target_type OFFLOAD_TARGET_TYPE_INTEL_MIC = 6 }; -/* Auxiliary struct, used for transferring a host-target address range mapping - from plugin to libgomp. */ -struct mapping_table +/* Auxiliary struct, used for transferring pairs of addresses from plugin + to libgomp. */ +struct addr_pair { - uintptr_t host_start; - uintptr_t host_end; - uintptr_t tgt_start; - uintptr_t tgt_end; + uintptr_t start; + uintptr_t end; }; /* Miscellaneous functions. */ diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 3089401..4e021f9 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -773,10 +773,10 @@ struct gomp_device_descr unsigned int (*get_caps_func) (void); int (*get_type_func) (void); int (*get_num_devices_func) (void); - void (*register_image_func) (void *, void *); void (*init_device_func) (int); void (*fini_device_func) (int); - int (*get_table_func) (int, struct mapping_table **); + int (*load_image_func) (int, void *, struct addr_pair **); + void (*unload_image_func) (int, void *); void *(*alloc_func) (int, size_t); void (*free_func) (int, void *); void *(*dev2host_func) (int, void *, const void *, size_t); @@ -793,9 +793,6 @@ struct gomp_device_descr /* Set to true when device is initialized. */ bool is_initialized; - /* True when offload regions have been registered with this device. */ - bool offload_regions_registered; - /* OpenACC-specific data and functions. */ /* This is mutable because of its mutable data_environ and target_data members. */ diff --git a/libgomp/libgomp.map b/libgomp/libgomp.map index f44174e..2b2b953 100644 --- a/libgomp/libgomp.map +++ b/libgomp/libgomp.map @@ -231,6 +231,7 @@ GOMP_4.0 { GOMP_4.0.1 { global: GOMP_offload_register; + GOMP_offload_unregister; } GOMP_4.0; OACC_2.0 { diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 6aeb1e7..5d67c6c 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -43,10 +43,10 @@ static struct gomp_device_descr host_dispatch = .get_caps_func = GOMP_OFFLOAD_get_caps, .get_type_func = GOMP_OFFLOAD_get_type, .get_num_devices_func = GOMP_OFFLOAD_get_num_devices, - .register_image_func = GOMP_OFFLOAD_register_image, .init_device_func = GOMP_OFFLOAD_init_device, .fini_device_func = GOMP_OFFLOAD_fini_device, - .get_table_func = GOMP_OFFLOAD_get_table, + .load_image_func = GOMP_OFFLOAD_load_image, + .unload_image_func = GOMP_OFFLOAD_unload_image, .alloc_func = GOMP_OFFLOAD_alloc, .free_func = GOMP_OFFLOAD_free, .dev2host_func = GOMP_OFFLOAD_dev2host, @@ -56,7 +56,6 @@ static struct gomp_device_descr host_dispatch = .mem_map.is_initialized = false, .mem_map.splay_tree.root = NULL, .is_initialized = false, - .offload_regions_registered = false, .openacc = { .open_device_func = GOMP_OFFLOAD_openacc_open_device, diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 166eb55..19b937a 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -284,12 +284,6 @@ lazy_open (int ord) = acc_dev->openacc.create_thread_data_func (acc_dev->openacc.target_data); acc_dev->openacc.async_set_async_func (acc_async_sync); - - struct gomp_memory_mapping *mem_map = &acc_dev->mem_map; - gomp_mutex_lock (&mem_map->lock); - if (!mem_map->is_initialized) - gomp_init_tables (acc_dev, mem_map); - gomp_mutex_unlock (&mem_map->lock); } /* OpenACC 2.0a (3.2.12, 3.2.13) doesn't specify whether the serialization of diff --git a/libgomp/plugin/plugin-host.c b/libgomp/plugin/plugin-host.c index ebf7f11..bc60f72 100644 --- a/libgomp/plugin/plugin-host.c +++ b/libgomp/plugin/plugin-host.c @@ -95,12 +95,6 @@ GOMP_OFFLOAD_get_num_devices (void) } STATIC void -GOMP_OFFLOAD_register_image (void *host_table __attribute__ ((unused)), - void *target_data __attribute__ ((unused))) -{ -} - -STATIC void GOMP_OFFLOAD_init_device (int n __attribute__ ((unused))) { } @@ -111,12 +105,19 @@ GOMP_OFFLOAD_fini_device (int n __attribute__ ((unused))) } STATIC int -GOMP_OFFLOAD_get_table (int n __attribute__ ((unused)), - struct mapping_table **table __attribute__ ((unused))) +GOMP_OFFLOAD_load_image (int n __attribute__ ((unused)), + void *i __attribute__ ((unused)), + struct addr_pair **r __attribute__ ((unused))) { return 0; } +STATIC void +GOMP_OFFLOAD_unload_image (int n __attribute__ ((unused)), + void *i __attribute__ ((unused))) +{ +} + STATIC void * GOMP_OFFLOAD_openacc_open_device (int n) { diff --git a/libgomp/target.c b/libgomp/target.c index ebff55e..ce2017c 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -635,7 +635,84 @@ gomp_update (struct gomp_device_descr *devicep, struct gomp_memory_mapping *mm, gomp_mutex_unlock (&mm->lock); } -/* This function should be called from every offload image. + +/* Insert mapping of host -> target address pairs to splay tree. */ + +static void +gomp_splay_tree_insert_mapping (struct gomp_device_descr *devicep, + struct addr_pair *host_addr, + struct addr_pair *tgt_addr) +{ + struct gomp_memory_mapping *mm = &devicep->mem_map; + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); + tgt->refcount = 1; + tgt->array = gomp_malloc (sizeof (*tgt->array)); + tgt->tgt_start = tgt_addr->start; + tgt->tgt_end = tgt_addr->end; + tgt->to_free = NULL; + tgt->list_count = 0; + tgt->device_descr = devicep; + splay_tree_node node = tgt->array; + splay_tree_key k = &node->key; + k->host_start = host_addr->start; + k->host_end = host_addr->end; + k->tgt_offset = 0; + k->refcount = 1; + k->copy_from = false; + k->tgt = tgt; + node->left = NULL; + node->right = NULL; + splay_tree_insert (&mm->splay_tree, node); +} + +/* Load image pointed by TARGET_DATA to the device, specified by DEVICEP. + And insert to splay tree the mapping between addresses from HOST_TABLE and + from loaded target image. */ + +static void +gomp_offload_image_to_device (struct gomp_device_descr *devicep, + void *host_table, void *target_data) +{ + void **host_func_table = ((void ***) host_table)[0]; + void **host_funcs_end = ((void ***) host_table)[1]; + void **host_var_table = ((void ***) host_table)[2]; + void **host_vars_end = ((void ***) host_table)[3]; + + /* The func table contains only addresses, the var table contains addresses + and corresponding sizes. */ + int num_funcs = host_funcs_end - host_func_table; + int num_vars = (host_vars_end - host_var_table) / 2; + + /* Load image to device and get target addresses for the image. */ + struct addr_pair *target_table = NULL; + int i, num_target_entries + = devicep->load_image_func (devicep->target_id, target_data, &target_table); + + if (num_target_entries != num_funcs + num_vars) + gomp_fatal ("Can't map target functions or variables"); + + /* Insert host-target address mapping into devicep->dev_splay_tree. */ + for (i = 0; i < num_funcs; i++) + { + struct addr_pair host_addr; + host_addr.start = (uintptr_t) host_func_table[i]; + host_addr.end = host_addr.start + 1; + gomp_splay_tree_insert_mapping (devicep, &host_addr, &target_table[i]); + } + + for (i = 0; i < num_vars; i++) + { + struct addr_pair host_addr; + host_addr.start = (uintptr_t) host_var_table[i*2]; + host_addr.end = host_addr.start + (uintptr_t) host_var_table[i*2+1]; + gomp_splay_tree_insert_mapping (devicep, &host_addr, + &target_table[num_funcs+i]); + } + + free (target_table); +} + +/* This function should be called from every offload image while loading. It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of the target, and TARGET_DATA needed by target plugin. */ @@ -643,6 +720,17 @@ void GOMP_offload_register (void *host_table, enum offload_target_type target_type, void *target_data) { + int i; + + /* Load image to all initialized devices. */ + for (i = 0; i < num_devices; i++) + { + struct gomp_device_descr *devicep = &devices[i]; + if (devicep->type == target_type && devicep->is_initialized) + gomp_offload_image_to_device (devicep, host_table, target_data); + } + + /* Insert image to array of pending images. */ offload_images = gomp_realloc (offload_images, (num_offload_images + 1) * sizeof (struct offload_image_descr)); @@ -654,54 +742,83 @@ GOMP_offload_register (void *host_table, enum offload_target_type target_type, num_offload_images++; } -/* This function initializes the target device, specified by DEVICEP. DEVICEP - must be locked on entry, and remains locked on return. */ +/* This function should be called from every offload image while unloading. + It gets the descriptor of the host func and var tables HOST_TABLE, TYPE of + the target, and TARGET_DATA needed by target plugin. */ -attribute_hidden void -gomp_init_device (struct gomp_device_descr *devicep) +void +GOMP_offload_unregister (void *host_table, enum offload_target_type target_type, + void *target_data) { - devicep->init_device_func (devicep->target_id); - devicep->is_initialized = true; + void **host_func_table = ((void ***) host_table)[0]; + void **host_funcs_end = ((void ***) host_table)[1]; + void **host_var_table = ((void ***) host_table)[2]; + void **host_vars_end = ((void ***) host_table)[3]; + int i; + + /* The func table contains only addresses, the var table contains addresses + and corresponding sizes. */ + int num_funcs = host_funcs_end - host_func_table; + int num_vars = (host_vars_end - host_var_table) / 2; + + /* Unload image from all initialized devices. */ + for (i = 0; i < num_devices; i++) + { + int j; + struct gomp_device_descr *devicep = &devices[i]; + struct gomp_memory_mapping *mm = &devicep->mem_map; + + if (devicep->type != target_type || !devicep->is_initialized) + continue; + + devicep->unload_image_func (devicep->target_id, target_data); + + /* Remove mapping from splay tree. */ + for (j = 0; j < num_funcs; j++) + { + struct splay_tree_key_s k; + k.host_start = (uintptr_t) host_func_table[j]; + k.host_end = k.host_start + 1; + splay_tree_remove (&mm->splay_tree, &k); + } + + for (j = 0; j < num_vars; j++) + { + struct splay_tree_key_s k; + k.host_start = (uintptr_t) host_var_table[j*2]; + k.host_end = k.host_start + (uintptr_t) host_var_table[j*2+1]; + splay_tree_remove (&mm->splay_tree, &k); + } + } + + /* Remove image from array of pending images. */ + for (i = 0; i < num_offload_images; i++) + if (offload_images[i].target_data == target_data) + { + offload_images[i] = offload_images[--num_offload_images]; + break; + } } -/* Initialize address mapping tables. MM must be locked on entry, and remains - locked on return. */ +/* This function initializes the target device, specified by DEVICEP. DEVICEP + must be locked on entry, and remains locked on return. */ attribute_hidden void -gomp_init_tables (struct gomp_device_descr *devicep, - struct gomp_memory_mapping *mm) +gomp_init_device (struct gomp_device_descr *devicep) { - /* Get address mapping table for device. */ - struct mapping_table *table = NULL; - int num_entries = devicep->get_table_func (devicep->target_id, &table); - - /* Insert host-target address mapping into dev_splay_tree. */ int i; - for (i = 0; i < num_entries; i++) + devicep->init_device_func (devicep->target_id); + + /* Load to device all images registered by the moment. */ + for (i = 0; i < num_offload_images; i++) { - struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); - tgt->refcount = 1; - tgt->array = gomp_malloc (sizeof (*tgt->array)); - tgt->tgt_start = table[i].tgt_start; - tgt->tgt_end = table[i].tgt_end; - tgt->to_free = NULL; - tgt->list_count = 0; - tgt->device_descr = devicep; - splay_tree_node node = tgt->array; - splay_tree_key k = &node->key; - k->host_start = table[i].host_start; - k->host_end = table[i].host_end; - k->tgt_offset = 0; - k->refcount = 1; - k->copy_from = false; - k->tgt = tgt; - node->left = NULL; - node->right = NULL; - splay_tree_insert (&mm->splay_tree, node); + struct offload_image_descr *image = &offload_images[i]; + if (image->type == devicep->type) + gomp_offload_image_to_device (devicep, image->host_table, + image->target_data); } - free (table); - mm->is_initialized = true; + devicep->is_initialized = true; } /* Free address mapping tables. MM must be locked on entry, and remains locked @@ -750,6 +867,7 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, unsigned char *kinds) { struct gomp_device_descr *devicep = resolve_device (device); + struct gomp_memory_mapping *mm = &devicep->mem_map; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) @@ -780,21 +898,12 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, fn_addr = (void *) fn; else { - struct gomp_memory_mapping *mm = &devicep->mem_map; - gomp_mutex_lock (&mm->lock); - - if (!mm->is_initialized) - gomp_init_tables (devicep, mm); - struct splay_tree_key_s k; k.host_start = (uintptr_t) fn; k.host_end = k.host_start + 1; splay_tree_key tgt_fn = splay_tree_lookup (&mm->splay_tree, &k); if (tgt_fn == NULL) gomp_fatal ("Target function wasn't mapped"); - - gomp_mutex_unlock (&mm->lock); - fn_addr = (void *) tgt_fn->tgt->tgt_start; } @@ -845,12 +954,6 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, gomp_init_device (devicep); gomp_mutex_unlock (&devicep->lock); - struct gomp_memory_mapping *mm = &devicep->mem_map; - gomp_mutex_lock (&mm->lock); - if (!mm->is_initialized) - gomp_init_tables (devicep, mm); - gomp_mutex_unlock (&mm->lock); - struct target_mem_desc *tgt = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, false); @@ -886,13 +989,7 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, gomp_init_device (devicep); gomp_mutex_unlock (&devicep->lock); - struct gomp_memory_mapping *mm = &devicep->mem_map; - gomp_mutex_lock (&mm->lock); - if (!mm->is_initialized) - gomp_init_tables (devicep, mm); - gomp_mutex_unlock (&mm->lock); - - gomp_update (devicep, mm, mapnum, hostaddrs, sizes, kinds, false); + gomp_update (devicep, &devicep->mem_map, mapnum, hostaddrs, sizes, kinds, false); } void @@ -961,10 +1058,10 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, DLSYM (get_caps); DLSYM (get_type); DLSYM (get_num_devices); - DLSYM (register_image); DLSYM (init_device); DLSYM (fini_device); - DLSYM (get_table); + DLSYM (load_image); + DLSYM (unload_image); DLSYM (alloc); DLSYM (free); DLSYM (dev2host); @@ -1027,22 +1124,6 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, return err == NULL; } -/* This function adds a compatible offload image IMAGE to an accelerator device - DEVICE. DEVICE must be locked on entry, and remains locked on return. */ - -static void -gomp_register_image_for_device (struct gomp_device_descr *device, - struct offload_image_descr *image) -{ - if (!device->offload_regions_registered - && (device->type == image->type - || device->type == OFFLOAD_TARGET_TYPE_HOST)) - { - device->register_image_func (image->host_table, image->target_data); - device->offload_regions_registered = true; - } -} - /* This function initializes the runtime needed for offloading. It parses the list of offload targets and tries to load the plugins for these targets. On return, the variables NUM_DEVICES and NUM_DEVICES_OPENMP @@ -1104,7 +1185,6 @@ gomp_target_init (void) current_device.mem_map.is_initialized = false; current_device.mem_map.splay_tree.root = NULL; current_device.is_initialized = false; - current_device.offload_regions_registered = false; current_device.openacc.data_environ = NULL; current_device.openacc.target_data = NULL; for (i = 0; i < new_num_devices; i++) @@ -1146,21 +1226,12 @@ gomp_target_init (void) for (i = 0; i < num_devices; i++) { - int j; - - for (j = 0; j < num_offload_images; j++) - gomp_register_image_for_device (&devices[i], &offload_images[j]); - /* The 'devices' array can be moved (by the realloc call) until we have found all the plugins, so registering with the OpenACC runtime (which takes a copy of the pointer argument) must be delayed until now. */ if (devices[i].capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) goacc_register (&devices[i]); } - - free (offload_images); - offload_images = NULL; - num_offload_images = 0; } #else /* PLUGIN_SUPPORT */ Thanks, -- Ilya