This is the main set of improvements to reference-counting behaviour (see parent email for further details).
ChangeLog libgomp/ * libgomp.h (splay_tree_key_s): Substitute dynamic_refcount field for virtual_refcount. (acc_dispatch_t): Remove data_environ field. (gomp_acc_insert_pointer, gomp_acc_data_env_remove_tgt): Remove prototypes. (gomp_acc_remove_pointer): Update prototype. * oacc-async.c (goacc_remove_var_async): New function. * oacc-host.c (host_dispatch): Don't initialise removed data_environ field. * oacc-init.c (acc_shutdown_1): Use gomp_remove_var instead of gomp_unmap_vars to remove mappings by splay tree key instead of target memory descriptor. * oacc-int.h (splay_tree_key_s): Add forward declaration. (goacc_remove_car_async): Add prototype. * oacc-mem.c (gomp_acc_data_env_remove, gomp_acc_data_env_remove_tgt): Remove functions. (present_create_copy): Use virtual_refcount instead of dynamic_refcount, and don't modify after calling gomp_map_vars_async. Don't create dummy target_mem_desc. Fix target pointer return value. (delete_copyout): Update for virtual_refcount semantics. Use goacc_remove_var_async for asynchronous delete/copyouts. (gomp_acc_insert_pointer): Remove function. (gomp_acc_remove_pointer): Use virtual_refcount semantics. * oacc-parallel.c (find_pointer): Add missing GOMP_MAP_FORCE_DETACH case. (GOACC_enter_exit_data): Fix struct mapping/unmapping for virtual_refcount semantics. Fix attach/detach behaviour. Don't call gomp_acc_insert_pointer. * target.c (gomp_map_vars_existing): Fix initialisation of do_detach field. (gomp_map_vars_async): Handle GOMP_MAP_VARS_OPENACC_ENTER_DATA. Update for virtual_refcount semantics. Add some missing initialisations in dynamic array code paths. (gomp_unmap_tgt): Don't call gomp_acc_data_env_remove_tgt. (gomp_remove_var): Fix use-after-free. (gomp_unmap_vars_async): Update for virtual_refcount semantics. (gomp_load_image_to_device): Don't use tgt's variable list to store static function and variable mappings. Initialise virtual refcount. (gomp_target_init): Don't initialise removed data_environ field. * testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c: Update test for fixed refcount behaviour. * testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c: Likewise. --- libgomp/libgomp.h | 22 +-- libgomp/oacc-async.c | 18 ++ libgomp/oacc-host.c | 2 - libgomp/oacc-init.c | 6 +- libgomp/oacc-int.h | 5 + libgomp/oacc-mem.c | 206 +++++--------------- libgomp/oacc-parallel.c | 127 ++++++------- libgomp/target.c | 63 ++++--- .../libgomp.oacc-c-c++-common/deep-copy-7.c | 11 +- .../libgomp.oacc-c-c++-common/deep-copy-8.c | 1 + 10 files changed, 189 insertions(+), 272 deletions(-)
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 568e260..ea44afc 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -860,8 +860,11 @@ struct splay_tree_key_s { uintptr_t tgt_offset; /* Reference count. */ uintptr_t refcount; - /* Dynamic reference count. */ - uintptr_t dynamic_refcount; + /* Reference counts beyond those that represent genuine references in the + linked splay tree key/target memory structures, e.g. for multiple OpenACC + "present increment" operations (via "acc enter data") refering to the same + host-memory block. */ + uintptr_t virtual_refcount; /* For a block with attached pointers, the attachment counters for each. */ unsigned short *attach_count; /* Pointer to the original mapping of "omp declare target link" object. */ @@ -887,13 +890,6 @@ splay_compare (splay_tree_key x, splay_tree_key y) typedef struct acc_dispatch_t { - /* This is a linked list of data mapped using the - acc_map_data/acc_unmap_data or "acc enter data"/"acc exit data" pragmas. - Unlike mapped_data in the goacc_thread struct, unmapping can - happen out-of-order with respect to mapping. */ - /* This is guarded by the lock in the "outer" struct gomp_device_descr. */ - struct target_mem_desc *data_environ; - /* Execute. */ __typeof (GOMP_OFFLOAD_openacc_exec) *exec_func; __typeof (GOMP_OFFLOAD_openacc_exec_params) *exec_params_func; @@ -1010,9 +1006,9 @@ enum gomp_map_vars_kind struct gomp_coalesce_buf; -extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *, int); -extern void gomp_acc_remove_pointer (void **, size_t *, unsigned short *, - int, void *, bool, int); +extern void gomp_acc_remove_pointer (struct gomp_device_descr *, void **, + size_t *, unsigned short *, int, bool, + int); extern void gomp_acc_declare_allocate (bool, size_t, void **, size_t *, unsigned short *); struct gomp_coalesce_buf; @@ -1041,8 +1037,6 @@ extern struct target_mem_desc *gomp_map_vars_async (struct gomp_device_descr *, size_t, void **, void **, size_t *, void *, bool, enum gomp_map_vars_kind); -extern void gomp_acc_data_env_remove_tgt (struct target_mem_desc **, - struct target_mem_desc *); extern void gomp_unmap_tgt (struct target_mem_desc *); extern void gomp_unmap_vars (struct target_mem_desc *, bool); extern void gomp_unmap_vars_async (struct target_mem_desc *, bool, diff --git a/libgomp/oacc-async.c b/libgomp/oacc-async.c index bb00279..be47222 100644 --- a/libgomp/oacc-async.c +++ b/libgomp/oacc-async.c @@ -385,6 +385,24 @@ goacc_async_copyout_unmap_vars (struct target_mem_desc *tgt, (void *) tgt); } +/* Remove a variable asynchronously. This actually removes the variable + mapping immediately, but retains the linked target_mem_desc until the + asynchronous operation has completed (as it may still refer to target + memory). The device lock must be held before entry, and remains locked on + exit. */ + +attribute_hidden void +goacc_remove_var_async (struct gomp_device_descr *devicep, splay_tree_key n, + struct goacc_asyncqueue *aq) +{ + struct target_mem_desc *tgt = n->tgt; + assert (tgt); + tgt->refcount++; + gomp_remove_var (devicep, n); + devicep->openacc.async.queue_callback_func (aq, goacc_async_unmap_tgt, + (void *) tgt); +} + attribute_hidden void goacc_async_free (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, void *ptr) diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 4521fff..00bc5f6 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -286,8 +286,6 @@ static struct gomp_device_descr host_dispatch = .state = GOMP_DEVICE_UNINITIALIZED, .openacc = { - .data_environ = NULL, - .exec_func = host_openacc_exec, .exec_params_func = host_openacc_exec_params, diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 48c9646..5e38d01 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -389,9 +389,9 @@ acc_shutdown_1 (acc_device_t d) { while (walk->dev->mem_map.root) { - struct target_mem_desc *tgt = walk->dev->mem_map.root->key.tgt; - - gomp_unmap_vars (tgt, false); + splay_tree_key k = &walk->dev->mem_map.root->key; + k->link_key = NULL; + gomp_remove_var (walk->dev, k); } walk->dev = NULL; diff --git a/libgomp/oacc-int.h b/libgomp/oacc-int.h index 1f6c62c..d903065 100644 --- a/libgomp/oacc-int.h +++ b/libgomp/oacc-int.h @@ -109,10 +109,15 @@ void goacc_restore_bind (void); void goacc_lazy_initialize (void); void goacc_host_init (void); +struct splay_tree_key_s; + void goacc_init_asyncqueues (struct gomp_device_descr *); bool goacc_fini_asyncqueues (struct gomp_device_descr *); void goacc_async_copyout_unmap_vars (struct target_mem_desc *, struct goacc_asyncqueue *); +void goacc_remove_var_async (struct gomp_device_descr *devicep, + struct splay_tree_key_s *n, + struct goacc_asyncqueue *aq); void goacc_async_free (struct gomp_device_descr *, struct goacc_asyncqueue *, void *); struct goacc_asyncqueue *get_goacc_asyncqueue (int); diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 3202f06..9b70820 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -439,77 +439,6 @@ acc_map_data (void *h, void *d, size_t s) } } -/* Remove the target_mem_desc holding the mapping for MAPNUM HOSTADDRS from - the OpenACC data environment pointed to by DATA_ENV. The device lock - should be held before calling, and remains locked on exit. */ - -static void -gomp_acc_data_env_remove (struct gomp_device_descr *acc_dev, - struct target_mem_desc **data_env, void **hostaddrs, - int mapnum) -{ - struct target_mem_desc *t, *tp; - - for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev) - { - bool all_match = true; - - /* We must locate the target descriptor by "value", matching each - hostaddr that it describes. */ - if (t->list_count != mapnum) - continue; - - for (int i = 0; i < t->list_count; i++) - if (t->list[i].key - && (t->list[i].key->host_start + t->list[i].offset - != (uintptr_t) hostaddrs[i])) - { - all_match = false; - break; - } - - if (all_match) - { - if (t->refcount > 1) - t->refcount--; - else - { - if (tp) - tp->prev = t->prev; - else - *data_env = t->prev; - } - return; - } - } - - gomp_mutex_unlock (&acc_dev->lock); - gomp_fatal ("cannot find data mapping to remove in data environment"); -} - -/* Similar, but removes target_mem_desc REMOVE from the DATA_ENV, in case its - reference count drops to zero resulting in it being unmapped (in - target.c:gomp_unmap_tgt). Unlike the above function it is not an error if - REMOVE is not present in the environment. The device lock should be held - before calling, and remains locked on exit. */ - -attribute_hidden void -gomp_acc_data_env_remove_tgt (struct target_mem_desc **data_env, - struct target_mem_desc *remove) -{ - struct target_mem_desc *t, *tp; - - for (tp = NULL, t = *data_env; t != NULL; tp = t, t = t->prev) - if (t == remove) - { - if (tp) - tp->prev = t->prev; - else - *data_env = t->prev; - return; - } -} - void acc_unmap_data (void *h) { @@ -626,26 +555,9 @@ present_create_copy (unsigned f, void *h, size_t s, int async) if (n->refcount != REFCOUNT_INFINITY) { n->refcount++; - n->dynamic_refcount++; + n->virtual_refcount++; } - struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt) - + sizeof (tgt->list[0])); - tgt->refcount = 1; - tgt->tgt_start = 0; - tgt->tgt_end = 0; - tgt->to_free = NULL; - tgt->prev = acc_dev->openacc.data_environ; - tgt->list_count = 1; - tgt->device_descr = acc_dev; - tgt->list[0].key = n; - tgt->list[0].copy_from = false; - tgt->list[0].always_copy_from = false; - tgt->list[0].do_detach = false; - tgt->list[0].offset = (uintptr_t) h - n->host_start; - tgt->list[0].length = 0; - acc_dev->openacc.data_environ = tgt; - gomp_mutex_unlock (&acc_dev->lock); } else if (!(f & FLAG_CREATE)) @@ -655,7 +567,6 @@ present_create_copy (unsigned f, void *h, size_t s, int async) } else { - struct target_mem_desc *tgt; size_t mapnum = 1; unsigned short kinds; void *hostaddrs = h; @@ -669,20 +580,15 @@ present_create_copy (unsigned f, void *h, size_t s, int async) goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, - &kinds, true, - GOMP_MAP_VARS_OPENACC_ENTER_DATA); - - for (int i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key) - tgt->list[i].key->dynamic_refcount++; + gomp_map_vars_async (acc_dev, aq, mapnum, &hostaddrs, NULL, &s, &kinds, + true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); gomp_mutex_lock (&acc_dev->lock); - tgt->prev = acc_dev->openacc.data_environ; - acc_dev->openacc.data_environ = tgt; + n = lookup_host (acc_dev, h, s); + assert (n != NULL); + d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h + - n->host_start); gomp_mutex_unlock (&acc_dev->lock); - - d = tgt->to_free; } if (profiling_setup_p) @@ -765,7 +671,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) { size_t host_size; splay_tree_key n; - void *d; struct goacc_thread *thr = goacc_thread (); struct gomp_device_descr *acc_dev = thr->dev; @@ -797,9 +702,6 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) gomp_fatal ("[%p,%d] is not mapped", (void *)h, (int)s); } - d = (void *) (n->tgt->tgt_start + n->tgt_offset - + (uintptr_t) h - n->host_start); - host_size = n->host_end - n->host_start; if (n->host_start != (uintptr_t) h || host_size != s) @@ -812,29 +714,37 @@ delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) if (n->refcount == REFCOUNT_INFINITY) { n->refcount = 0; - n->dynamic_refcount = 0; - n->attach_count = NULL; + n->virtual_refcount = 0; } if (f & FLAG_FINALIZE) { - n->refcount -= n->dynamic_refcount; - n->dynamic_refcount = 0; + n->refcount -= n->virtual_refcount; + n->virtual_refcount = 0; } - else if (n->dynamic_refcount) + + if (n->virtual_refcount > 0) { - n->dynamic_refcount--; n->refcount--; + n->virtual_refcount--; } + else if (n->refcount > 0) + n->refcount--; if (n->refcount == 0) { + goacc_aq aq = get_goacc_asyncqueue (async); + if (f & FLAG_COPYOUT) - { - goacc_aq aq = get_goacc_asyncqueue (async); + { + void *d = (void *) (n->tgt->tgt_start + n->tgt_offset + + (uintptr_t) h - n->host_start); gomp_copy_dev2host (acc_dev, aq, h, d, s); } - gomp_remove_var (acc_dev, n); + if (aq) + goacc_remove_var_async (acc_dev, n, aq); + else + gomp_remove_var (acc_dev, n); } gomp_mutex_unlock (&acc_dev->lock); @@ -1003,53 +913,15 @@ gomp_acc_declare_allocate (bool allocate, size_t mapnum, void **hostaddrs, } void -gomp_acc_insert_pointer (size_t mapnum, void **hostaddrs, size_t *sizes, - void *kinds, int async) +gomp_acc_remove_pointer (struct gomp_device_descr *acc_dev, void **hostaddrs, + size_t *sizes, unsigned short *kinds, int async, + bool finalize, int mapnum) { - struct target_mem_desc *tgt; - struct goacc_thread *thr = goacc_thread (); - struct gomp_device_descr *acc_dev = thr->dev; - - gomp_debug (0, " %s: prepare mappings\n", __FUNCTION__); - goacc_aq aq = get_goacc_asyncqueue (async); - tgt = gomp_map_vars_async (acc_dev, aq, mapnum, hostaddrs, - NULL, sizes, kinds, true, - GOMP_MAP_VARS_OPENACC_ENTER_DATA); - gomp_debug (0, " %s: mappings prepared\n", __FUNCTION__); - - for (size_t i = 0; i < tgt->list_count; i++) - if (tgt->list[i].key) - tgt->list[i].key->dynamic_refcount++; - - gomp_mutex_lock (&acc_dev->lock); - tgt->prev = acc_dev->openacc.data_environ; - acc_dev->openacc.data_environ = tgt; - gomp_mutex_unlock (&acc_dev->lock); -} - -void -gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds, - int async, void *detach_from, bool finalize, - int mapnum) -{ - struct goacc_thread *thr = goacc_thread (); - struct gomp_device_descr *acc_dev = thr->dev; struct splay_tree_key_s cur_node; splay_tree_key n; gomp_mutex_lock (&acc_dev->lock); - if (detach_from) - { - splay_tree_key n2 = lookup_host (acc_dev, detach_from, 1); - goacc_aq aq = get_goacc_asyncqueue (async); - gomp_detach_pointer (acc_dev, aq, n2, (uintptr_t) detach_from, finalize, - NULL); - } - - gomp_acc_data_env_remove (acc_dev, &acc_dev->openacc.data_environ, hostaddrs, - mapnum); - for (int i = 0; i < mapnum; i++) { int kind = kinds[i] & 0xff; @@ -1062,6 +934,7 @@ gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds, case GOMP_MAP_ALWAYS_FROM: copyfrom = true; /* Fallthrough. */ + case GOMP_MAP_TO_PSET: case GOMP_MAP_POINTER: case GOMP_MAP_DELETE: @@ -1075,27 +948,41 @@ gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds, || kind == GOMP_MAP_POINTER) ? sizeof (void *) : sizes[i]); n = splay_tree_lookup (&acc_dev->mem_map, &cur_node); + if (n == NULL) continue; + + if (n->refcount == REFCOUNT_INFINITY) + { + n->refcount = 1; + n->virtual_refcount = 0; + } + if (finalize) { - n->refcount -= n->dynamic_refcount; - n->dynamic_refcount = 0; + n->refcount -= n->virtual_refcount; + n->virtual_refcount = 0; } - else if (n->refcount > 0 && n->refcount != REFCOUNT_INFINITY) + + if (n->virtual_refcount > 0) { n->refcount--; - n->dynamic_refcount--; + n->virtual_refcount--; } + else if (n->refcount > 0) + n->refcount--; + if (copyfrom) gomp_copy_dev2host (acc_dev, NULL, (void *) cur_node.host_start, (void *) (n->tgt->tgt_start + n->tgt_offset + cur_node.host_start - n->host_start), cur_node.host_end - cur_node.host_start); + if (n->refcount == 0) gomp_remove_var (acc_dev, n); break; + default: gomp_mutex_unlock (&acc_dev->lock); gomp_fatal ("gomp_acc_remove_pointer unhandled kind 0x%.2x", @@ -1103,7 +990,6 @@ gomp_acc_remove_pointer (void **hostaddrs, size_t *sizes, unsigned short *kinds, } } - gomp_mutex_unlock (&acc_dev->lock); } diff --git a/libgomp/oacc-parallel.c b/libgomp/oacc-parallel.c index 8a3c65b..a4487b8 100644 --- a/libgomp/oacc-parallel.c +++ b/libgomp/oacc-parallel.c @@ -69,7 +69,8 @@ find_pointer (int pos, size_t mapnum, unsigned short *kinds) if (kind1 == GOMP_MAP_POINTER || kind1 == GOMP_MAP_ALWAYS_POINTER || kind1 == GOMP_MAP_ATTACH - || kind1 == GOMP_MAP_DETACH) + || kind1 == GOMP_MAP_DETACH + || kind1 == GOMP_MAP_FORCE_DETACH) return 2; else if (kind1 == GOMP_MAP_TO_PSET) return 3; @@ -847,42 +848,10 @@ GOACC_enter_exit_data (int device, size_t mapnum, case GOMP_MAP_STRUCT: { int elems = sizes[i]; - struct splay_tree_key_s k; - splay_tree_key str; - uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1]; - uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems] - + sizes[i + elems]; - k.host_start = elems_lo; - k.host_end = elems_hi; - gomp_mutex_lock (&acc_dev->lock); - str = splay_tree_lookup (&acc_dev->mem_map, &k); - gomp_mutex_unlock (&acc_dev->lock); - if (str == NULL) - { - size_t mapsize = elems_hi - elems_lo; - goacc_aq aq = get_goacc_asyncqueue (async); - struct target_mem_desc *tgt; - unsigned short thiskind = GOMP_MAP_ALLOC; - int j; - for (j = 0; j < elems; j++) - if ((kinds[i + j] & 0xff) != GOMP_MAP_ALLOC) - { - thiskind = GOMP_MAP_TO; - break; - } - tgt = gomp_map_vars_async (acc_dev, aq, 1, - &hostaddrs[i + 1], NULL, &mapsize, &thiskind, - true, GOMP_MAP_VARS_OPENACC_ENTER_DATA); - - for (j = 0; j < tgt->list_count; j++) - if (tgt->list[j].key) - tgt->list[j].key->dynamic_refcount++; - - gomp_mutex_lock (&acc_dev->lock); - tgt->prev = acc_dev->openacc.data_environ; - acc_dev->openacc.data_environ = tgt; - gomp_mutex_unlock (&acc_dev->lock); - } + goacc_aq aq = get_goacc_asyncqueue (async); + gomp_map_vars_async (acc_dev, aq, elems + 1, &hostaddrs[i], + NULL, &sizes[i], &kinds[i], true, + GOMP_MAP_VARS_OPENACC_ENTER_DATA); i += elems; } break; @@ -898,8 +867,15 @@ GOACC_enter_exit_data (int device, size_t mapnum, gomp_acc_declare_allocate (true, pointer, &hostaddrs[i], &sizes[i], &kinds[i]); else - gomp_acc_insert_pointer (pointer, &hostaddrs[i], - &sizes[i], &kinds[i], async); + { + goacc_aq aq = get_goacc_asyncqueue (async); + for (int j = 0; j < 2; j++) + gomp_map_vars_async (acc_dev, aq, + (j == 0 || pointer == 2) ? 1 : 2, + &hostaddrs[i + j], NULL, + &sizes[i + j], &kinds[i + j], true, + GOMP_MAP_VARS_OPENACC_ENTER_DATA); + } /* Increment 'i' by two because OpenACC requires fortran arrays to be contiguous, so each PSET is associated with one of MAP_FORCE_ALLOC/MAP_FORCE_PRESET/MAP_FORCE_TO, and @@ -930,8 +906,7 @@ GOACC_enter_exit_data (int device, size_t mapnum, } else { - /* This loop only handles explicit "detach" clauses that are not an - implicit part of a copy{,in,out}, etc. mapping. */ + /* Handle "detach" before copyback/deletion of mapped data. */ for (i = 0; i < mapnum; i++) { unsigned char kind = kinds[i] & 0xff; @@ -948,7 +923,16 @@ GOACC_enter_exit_data (int device, size_t mapnum, i += sizes[i]; } else - i += pointer - 1; + { + unsigned char kind2 = kinds[i + pointer - 1] & 0xff; + + if (kind2 == GOMP_MAP_DETACH) + acc_detach (hostaddrs[i + pointer - 1]); + else if (kind2 == GOMP_MAP_FORCE_DETACH) + acc_detach_finalize (hostaddrs[i + pointer - 1]); + + i += pointer - 1; + } } for (i = 0; i < mapnum; ++i) @@ -985,19 +969,39 @@ GOACC_enter_exit_data (int device, size_t mapnum, case GOMP_MAP_STRUCT: { int elems = sizes[i]; - struct splay_tree_key_s k; - splay_tree_key str; - uintptr_t elems_lo = (uintptr_t) hostaddrs[i + 1]; - uintptr_t elems_hi = (uintptr_t) hostaddrs[i + elems] - + sizes[i + elems]; - k.host_start = elems_lo; - k.host_end = elems_hi; - gomp_mutex_lock (&acc_dev->lock); - str = splay_tree_lookup (&acc_dev->mem_map, &k); - gomp_mutex_unlock (&acc_dev->lock); - if (str == NULL) - gomp_fatal ("[%p,%ld] is not mapped", (void *) elems_lo, - (unsigned long) (elems_hi - elems_lo)); + goacc_aq aq = get_goacc_asyncqueue (async); + for (int j = 1; j <= elems; j++) + { + struct splay_tree_key_s k; + k.host_start = (uintptr_t) hostaddrs[i + j]; + k.host_end = k.host_start + sizes[i + j]; + splay_tree_key str; + gomp_mutex_lock (&acc_dev->lock); + str = splay_tree_lookup (&acc_dev->mem_map, &k); + gomp_mutex_unlock (&acc_dev->lock); + if (str) + { + if (finalize) + { + str->refcount -= str->virtual_refcount; + str->virtual_refcount = 0; + } + if (str->virtual_refcount > 0) + { + str->refcount--; + str->virtual_refcount--; + } + else if (str->refcount > 0) + str->refcount--; + if (str->refcount == 0) + { + if (aq) + goacc_remove_var_async (acc_dev, str, aq); + else + gomp_remove_var (acc_dev, str); + } + } + } i += elems; } break; @@ -1012,17 +1016,8 @@ GOACC_enter_exit_data (int device, size_t mapnum, gomp_acc_declare_allocate (false, pointer, &hostaddrs[i], &sizes[i], &kinds[i]); else - { - unsigned short ptrkind = kinds[i + pointer - 1] & 0xff; - bool detach = (ptrkind == GOMP_MAP_DETACH - || ptrkind == GOMP_MAP_FORCE_DETACH); - void *detach_from = detach ? hostaddrs[i + pointer - 1] - : NULL; - gomp_acc_remove_pointer (&hostaddrs[i], &sizes[i], &kinds[i], - async, detach_from, finalize, - pointer); - /* See the above comment. */ - } + gomp_acc_remove_pointer (acc_dev, &hostaddrs[i], &sizes[i], + &kinds[i], async, finalize, pointer); i += pointer - 1; } } diff --git a/libgomp/target.c b/libgomp/target.c index bb5e1e9..91139a6 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -374,7 +374,7 @@ gomp_map_vars_existing (struct gomp_device_descr *devicep, tgt_var->key = oldn; tgt_var->copy_from = GOMP_MAP_COPY_FROM_P (kind); tgt_var->always_copy_from = GOMP_MAP_ALWAYS_FROM_P (kind); - tgt_var->do_detach = false; + tgt_var->do_detach = kind == GOMP_MAP_ATTACH; tgt_var->offset = newn->host_start - oldn->host_start; tgt_var->length = newn->host_end - newn->host_start; @@ -841,8 +841,9 @@ gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum, attribute_hidden struct target_mem_desc * gomp_map_vars_async (struct gomp_device_descr *devicep, struct goacc_asyncqueue *aq, size_t mapnum, - void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds, - bool short_mapkind, enum gomp_map_vars_kind pragma_kind) + void **hostaddrs, void **devaddrs, size_t *sizes, + void *kinds, bool short_mapkind, + enum gomp_map_vars_kind pragma_kind) { size_t i, tgt_align, tgt_size, not_found_cnt = 0; bool has_firstprivate = false; @@ -873,7 +874,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, tgt = gomp_malloc (sizeof (*tgt) + sizeof (tgt->list[0]) * (mapnum + da_data_row_num)); tgt->list_count = mapnum + da_data_row_num; - tgt->refcount = pragma_kind == GOMP_MAP_VARS_ENTER_DATA ? 0 : 1; + tgt->refcount = (pragma_kind == GOMP_MAP_VARS_ENTER_DATA + || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) ? 0 : 1; tgt->device_descr = devicep; struct gomp_coalesce_buf cbuf, *cbufp = NULL; @@ -1307,6 +1309,10 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, { tgt->list[i].key = &array->key; tgt->list[i].key->tgt = tgt; + tgt->list[i].key->refcount = REFCOUNT_INFINITY; + tgt->list[i].key->virtual_refcount = 0; + tgt->list[i].key->attach_count = NULL; + tgt->list[i].key->link_key = NULL; array++; continue; } @@ -1356,7 +1362,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, tgt->list[i].offset = 0; tgt->list[i].length = k->host_end - k->host_start; k->refcount = 1; - k->dynamic_refcount = 0; + k->virtual_refcount = 0; k->attach_count = NULL; tgt->refcount++; array->left = NULL; @@ -1528,7 +1534,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, k->tgt = tgt; k->refcount = 1; - k->dynamic_refcount = 0; + k->virtual_refcount = 0; k->attach_count = NULL; k->link_key = NULL; tgt_size = (tgt_size + align - 1) & ~(align - 1); @@ -1611,8 +1617,20 @@ gomp_map_vars_async (struct gomp_device_descr *devicep, /* If the variable from "omp target enter data" map-list was already mapped, tgt is not needed. Otherwise tgt will be freed by gomp_unmap_vars or gomp_exit_data. */ - if (pragma_kind == GOMP_MAP_VARS_ENTER_DATA && tgt->refcount == 0) - { + if ((pragma_kind == GOMP_MAP_VARS_ENTER_DATA + || pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) + && tgt->refcount == 0) + { + /* If we're about to discard a target_mem_desc with no "structural" + references (tgt->refcount == 0), any splay keys linked in the tgt's + list must have their virtual refcount incremented to represent that + "lost" reference in order to implement the semantics of the OpenACC + "present increment" operation properly. */ + if (pragma_kind == GOMP_MAP_VARS_OPENACC_ENTER_DATA) + for (i = 0; i < tgt->list_count; i++) + if (tgt->list[i].key) + tgt->list[i].key->virtual_refcount++; + free (tgt); tgt = NULL; } @@ -1628,8 +1646,6 @@ gomp_unmap_tgt (struct target_mem_desc *tgt) if (tgt->tgt_end) gomp_free_device_memory (tgt->device_descr, tgt->to_free); - gomp_acc_data_env_remove_tgt (&tgt->device_descr->openacc.data_environ, tgt); - free (tgt->array); free (tgt); } @@ -1641,6 +1657,8 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) splay_tree_remove (&devicep->mem_map, k); if (k->link_key) splay_tree_insert (&devicep->mem_map, (splay_tree_node) k->link_key); + if (k->attach_count) + free (k->attach_count); if (k->tgt->refcount > 1) k->tgt->refcount--; else @@ -1648,8 +1666,6 @@ gomp_remove_var (struct gomp_device_descr *devicep, splay_tree_key k) is_tgt_unmapped = true; gomp_unmap_tgt (k->tgt); } - if (k->attach_count) - free (k->attach_count); return is_tgt_unmapped; } @@ -1706,7 +1722,14 @@ gomp_unmap_vars_async (struct target_mem_desc *tgt, bool do_copyfrom, continue; bool do_unmap = false; - if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) + if (k->tgt == tgt + && k->virtual_refcount > 0 + && k->refcount != REFCOUNT_INFINITY) + { + k->virtual_refcount--; + k->refcount--; + } + else if (k->refcount > 1 && k->refcount != REFCOUNT_INFINITY) k->refcount--; else if (k->refcount == 1) { @@ -1830,17 +1853,14 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, } /* Insert host-target address mapping into splay tree. */ - struct target_mem_desc *tgt = - gomp_malloc (sizeof (*tgt) - + sizeof (tgt->list[0]) - * (num_funcs + num_vars) * sizeof (*tgt->array)); + struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array)); tgt->refcount = REFCOUNT_INFINITY; tgt->tgt_start = 0; tgt->tgt_end = 0; tgt->to_free = NULL; tgt->prev = NULL; - tgt->list_count = num_funcs + num_vars; + tgt->list_count = 0; tgt->device_descr = devicep; splay_tree_node array = tgt->array; @@ -1852,10 +1872,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_table[i].start; k->refcount = REFCOUNT_INFINITY; + k->virtual_refcount = 0; k->attach_count = NULL; k->link_key = NULL; - tgt->list[i].key = k; - tgt->refcount++; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -1887,10 +1906,9 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, k->tgt = tgt; k->tgt_offset = target_var->start; k->refcount = target_size & link_bit ? REFCOUNT_LINK : REFCOUNT_INFINITY; + k->virtual_refcount = 0; k->attach_count = NULL; k->link_key = NULL; - tgt->list[i].key = k; - tgt->refcount++; array->left = NULL; array->right = NULL; splay_tree_insert (&devicep->mem_map, array); @@ -3604,7 +3622,6 @@ gomp_target_init (void) current_device.type = current_device.get_type_func (); current_device.mem_map.root = NULL; current_device.state = GOMP_DEVICE_UNINITIALIZED; - current_device.openacc.data_environ = NULL; /* Augment DEVICES and NUM_DEVICES. */ devices = gomp_realloc (devices, diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c index 3a970a0..a59047a 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-7.c @@ -20,16 +20,19 @@ main () for (k = 0; k < 16; k++) { + /* Here, we do not explicitly copy the enclosing structure, but work + with fields directly. Make sure attachment counters and reference + counters work properly in that case. */ #pragma acc enter data copyin(v.a, v.b[0:n]) - +#pragma acc enter data pcopyin(v.b[0:n]) #pragma acc enter data pcopyin(v.b[0:n]) -#pragma acc parallel loop attach(v.b) +#pragma acc parallel loop present(v.a, v.b) for (i = 0; i < n; i++) v.b[i] = v.a + i; -#pragma acc exit data copyout(v.b[:n]) -#pragma acc exit data delete(v) finalize +#pragma acc exit data copyout(v.b[:n]) finalize +#pragma acc exit data delete(v.a) for (i = 0; i < n; i++) assert (v.b[i] == v.a + i); diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c index 54f553b..0ca5990 100644 --- a/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/deep-copy-8.c @@ -35,6 +35,7 @@ main () #pragma acc exit data copyout(v.b[:n]) #pragma acc exit data copyout(v.c[:n]) #pragma acc exit data copyout(v.d[:n]) +#pragma acc exit data copyout(v.a) for (i = 0; i < n; i++) assert (v.b[i] == v.a + i);