Hi Andrew!
On 2022-03-08T11:30:57+0000, Hafiz Abid Qadeer <[email protected]> wrote:
> From: Andrew Stubbs <[email protected]>
>
> This adds support for using Cuda Managed Memory with omp_alloc. It will be
> used as the underpinnings for "requires unified_shared_memory" in a later
> patch.
>
> There are two new predefined allocators, ompx_unified_shared_mem_alloc and
> ompx_host_mem_alloc, plus corresponding memory spaces, [...]
> --- a/libgomp/config/linux/allocator.c
> +++ b/libgomp/config/linux/allocator.c
> @@ -42,9 +42,11 @@
> static void *
> linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin)
> {
> - (void)memspace;
> -
> - if (pin)
> + if (memspace == ompx_unified_shared_mem_space)
> + {
> + return gomp_usm_alloc (size, GOMP_DEVICE_ICV);
> + }
> + else if (pin)
> {
> void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
> MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
This I understand conceptually, but then:
> @@ -67,7 +69,14 @@ linux_memspace_alloc (omp_memspace_handle_t memspace,
> size_t size, int pin)
> static void *
> linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
> {
> - if (pin)
> + if (memspace == ompx_unified_shared_mem_space)
> + {
> + void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV);
> + memset (ret, 0, size);
> + return ret;
> + }
> + else if (memspace == ompx_unified_shared_mem_space
> + || pin)
> return linux_memspace_alloc (memspace, size, pin);
> else
> return calloc (1, size);
..., here, we've got a duplicated (and thus always-false) expression
'memspace == ompx_unified_shared_mem_space' (..., which
'-Wduplicated-cond' fails to report; <https://gcc.gnu.org/PR108753>
"'-Wduplicated-cond' doesn't diagnose duplicated subexpressions"...).
Is the correct fix the following (conceptually like
'linux_memspace_alloc' cited above), or is there something that I fail to
understand?
static void *
linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int
pin)
{
if (memspace == ompx_unified_shared_mem_space)
{
void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV);
memset (ret, 0, size);
return ret;
}
- else if (memspace == ompx_unified_shared_mem_space
- || pin)
+ else if (pin)
return linux_memspace_alloc (memspace, size, pin);
else
return calloc (1, size);
The following ones then again are conceptually like
'linux_memspace_alloc' cited above:
> @@ -77,9 +86,9 @@ static void
> linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size,
> int pin)
> {
> - (void)memspace;
> -
> - if (pin)
> + if (memspace == ompx_unified_shared_mem_space)
> + gomp_usm_free (addr, GOMP_DEVICE_ICV);
> + else if (pin)
> munmap (addr, size);
> else
> free (addr);
> @@ -89,7 +98,9 @@ static void *
> linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
> size_t oldsize, size_t size, int oldpin, int pin)
> {
> - if (oldpin && pin)
> + if (memspace == ompx_unified_shared_mem_space)
> + goto manual_realloc;
> + else if (oldpin && pin)
> {
> void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE);
> if (newaddr == MAP_FAILED)
> @@ -98,18 +109,19 @@ linux_memspace_realloc (omp_memspace_handle_t memspace,
> void *addr,
> [...]
..., and similar those here:
> --- a/libgomp/config/nvptx/allocator.c
> +++ b/libgomp/config/nvptx/allocator.c
> @@ -125,6 +125,8 @@ nvptx_memspace_alloc (omp_memspace_handle_t memspace,
> size_t size)
> __atomic_store_n (&__nvptx_lowlat_heap_root, root.raw,
> MEMMODEL_RELEASE);
> return result;
> }
> + else if (memspace == ompx_host_mem_space)
> + return NULL;
> else
> return malloc (size);
> }
> @@ -145,6 +147,8 @@ nvptx_memspace_calloc (omp_memspace_handle_t memspace,
> size_t size)
>
> return result;
> }
> + else if (memspace == ompx_host_mem_space)
> + return NULL;
> else
> return calloc (1, size);
> }
> @@ -354,6 +358,8 @@ nvptx_memspace_realloc (omp_memspace_handle_t memspace,
> void *addr,
> }
> return result;
> }
> + else if (memspace == ompx_host_mem_space)
> + return NULL;
> else
> return realloc (addr, size);
> }
(I'd have added an explicit no-op (or, 'abort'?) to
'nvptx_memspace_free', but that's maybe just me...) ;-\
> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h
> +extern void * gomp_usm_alloc (size_t size, int device_num);
> +extern void gomp_usm_free (void *device_ptr, int device_num);
> +extern bool gomp_is_usm_ptr (void *ptr);
'gomp_is_usm_ptr' isn't defined/used anywhere; I'll remove it.
> --- a/libgomp/target.c
> +++ b/libgomp/target.c
> @@ -3740,6 +3807,9 @@ gomp_load_plugin_for_device (struct gomp_device_descr
> *device,
> DLSYM (unload_image);
> DLSYM (alloc);
> DLSYM (free);
> + DLSYM_OPT (usm_alloc, usm_alloc);
> + DLSYM_OPT (usm_free, usm_free);
> + DLSYM_OPT (is_usm_ptr, is_usm_ptr);
> DLSYM (dev2host);
> DLSYM (host2dev);
As a sanity check, shouldn't we check that either none or all three of
those are defined, like in the 'if (cuda && cuda != 4) { [error] }' check
a bit further down?
Note that these remarks likewise apply to the current upstream
submission:
<https://inbox.sourceware.org/gcc-patches/ef374d055251b2bc65b97d7e54a0a72d811b869d.1657188329.git....@codesourcery.com>
"openmp, nvptx: ompx_unified_shared_mem_alloc".
Grüße
Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht
München, HRB 106955