As mentioned in the parent email, this is a fix for PR95590 that detects updates of attached pointers in blocks, and rewrites the attached pointer and resets its attachment counter appropriately. I am however not entirely sure this is desirable or required by the spec: points against are:
- To avoid expensive copies from the device to the host and/or "wrong way" device-to-host splay tree lookups, it requires keeping an extra shadow copy of mapped blocks on the host in order to detect if a host pointer with attachments in the block has been changed between attach operations. We incur this overhead unconditionally if attach/detach are in use for what's not likely to be a common use case (it's slightly tricky to write a test case to exercise the behaviour, even -- Thomas's unmodified original for the PR raises an error after the previous patch in this series). - From a user perspective, I think it's going to be quite easy to get confused wrt. the hidden attachment counter state, with this kind of reset-on-host-pointer-modification behaviour. Mind you, silently *not* doing the update is likewise going to be confusing (the stale device pointer would be updated at present). Maybe this should be detected as an error instead? - The text in "2.6.8. Attachment Counter" *might* contribute to the argument that this kind of pointer-update detection is not required. Anyway, thoughts, or OK for mainline? Thanks, Julian ChangeLog PR libgomp/95590 libgomp/ * target.c (gomp_attach_pointer): Initialise shadow copy of block with attached pointers, and use to detect modifications of those pointers. * testsuite/libgomp.oacc-c-c++-common/attach-ptr-change-1.c: New test. --- libgomp/target.c | 29 +++++++- .../attach-ptr-change-1.c | 74 +++++++++++++++++++ 2 files changed, 100 insertions(+), 3 deletions(-) create mode 100644 libgomp/testsuite/libgomp.oacc-c-c++-common/attach-ptr-change-1.c diff --git a/libgomp/target.c b/libgomp/target.c index db6f56a8ff8..076cc2bbbcb 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -691,6 +691,8 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, { struct splay_tree_key_s s; size_t size, idx; + char *shadow_block; + size_t shadow_size = n->host_end - n->host_start; if (n == NULL) { @@ -707,9 +709,31 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, if (!n->aux) n->aux = gomp_malloc_cleared (sizeof (struct splay_tree_aux)); + bool first = false; + if (!n->aux->attach_count) - n->aux->attach_count - = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size); + { + n->aux->attach_count + = gomp_malloc_cleared (sizeof (*n->aux->attach_count) * size + + shadow_size); + first = true; + } + + shadow_block = ((char *) n->aux->attach_count) + + sizeof (*n->aux->attach_count) * size; + + if (first) + memcpy (shadow_block, (const void *) n->host_start, shadow_size); + + uintptr_t target = (uintptr_t) *(void **) attach_to; + uintptr_t shadow_target + = (uintptr_t) *(void **) (shadow_block + attach_to - n->host_start); + if (target != shadow_target) + { + n->aux->attach_count[idx] = 0; + memcpy ((char *) shadow_block + attach_to - n->host_start, + (const void *) target, sizeof (void *)); + } if (n->aux->attach_count[idx] < UINTPTR_MAX) n->aux->attach_count[idx]++; @@ -723,7 +747,6 @@ gomp_attach_pointer (struct gomp_device_descr *devicep, { uintptr_t devptr = n->tgt->tgt_start + n->tgt_offset + attach_to - n->host_start; - uintptr_t target = (uintptr_t) *(void **) attach_to; splay_tree_key tn; uintptr_t data; diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/attach-ptr-change-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/attach-ptr-change-1.c new file mode 100644 index 00000000000..d4d84fdb092 --- /dev/null +++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/attach-ptr-change-1.c @@ -0,0 +1,74 @@ +#include <assert.h> +#include <stdlib.h> +#include <openacc.h> + +struct str { + unsigned char *c; +}; + +int main() +{ + const int size_1 = sizeof (void *); + unsigned char *data_1 = (unsigned char *) malloc(sizeof (void *)); + assert(data_1); + void *data_1_d = acc_create(data_1, size_1); + assert(data_1_d); + assert(acc_is_present(data_1, size_1)); + + const int size_2 = sizeof (void *); + unsigned char *data_2 = (unsigned char *) malloc(size_2); + assert(data_2); + void *data_2_d = acc_create(data_2, size_2); + assert(data_2_d); + assert(acc_is_present(data_2, size_2)); + + struct str data_work; + data_work.c = data_1; + + acc_copyin(&data_work, sizeof data_work); + assert(acc_is_present(&data_work, sizeof data_work)); + assert(data_work.c == data_1); + + /* No attach has taken place so far. We can still do a self-update. */ + acc_update_self(&data_work, sizeof data_work); + assert(data_work.c == data_1); + + data_1[0] = 'a'; + data_2[0] = 'b'; + + acc_update_device (data_1, size_1); + acc_update_device (data_2, size_2); + + acc_attach((void **) &data_work.c); + #pragma acc serial /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ + { + data_work.c[0] = 'c'; + } + + acc_update_self (data_1, size_1); + acc_update_self (data_2, size_2); + + assert (data_1[0] == 'c'); + assert (data_2[0] == 'b'); + + data_1[0] = 'a'; + data_2[0] = 'b'; + + acc_update_device (data_1, size_1); + acc_update_device (data_2, size_2); + + data_work.c = data_2; + acc_attach((void **) &data_work.c); + #pragma acc serial /* { dg-warning "using vector_length \\(32\\), ignoring 1" "" { target openacc_nvidia_accel_selected } } */ + { + data_work.c[0] = 'd'; + } + + acc_update_self (data_1, size_1); + acc_update_self (data_2, size_2); + + assert (data_1[0] == 'a'); + assert (data_2[0] == 'd'); + + return 0; +} -- 2.23.0