Hi Andrew!

On 2022-03-08T11:30:57+0000, Hafiz Abid Qadeer <ab...@codesourcery.com> wrote:
> From: Andrew Stubbs <a...@codesourcery.com>
>
> This adds support for using Cuda Managed Memory with omp_alloc.  It will be
> used as the underpinnings for "requires unified_shared_memory" in a later
> patch.
>
> There are two new predefined allocators, ompx_unified_shared_mem_alloc and
> ompx_host_mem_alloc, plus corresponding memory spaces, [...]

> --- a/libgomp/config/linux/allocator.c
> +++ b/libgomp/config/linux/allocator.c
> @@ -42,9 +42,11 @@
>  static void *
>  linux_memspace_alloc (omp_memspace_handle_t memspace, size_t size, int pin)
>  {
> -  (void)memspace;
> -
> -  if (pin)
> +  if (memspace == ompx_unified_shared_mem_space)
> +    {
> +      return gomp_usm_alloc (size, GOMP_DEVICE_ICV);
> +    }
> +  else if (pin)
>      {
>        void *addr = mmap (NULL, size, PROT_READ | PROT_WRITE,
>                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

This I understand conceptually, but then:

> @@ -67,7 +69,14 @@ linux_memspace_alloc (omp_memspace_handle_t memspace, 
> size_t size, int pin)
>  static void *
>  linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int pin)
>  {
> -  if (pin)
> +  if (memspace == ompx_unified_shared_mem_space)
> +    {
> +      void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV);
> +      memset (ret, 0, size);
> +      return ret;
> +    }
> +  else if (memspace == ompx_unified_shared_mem_space
> +      || pin)
>      return linux_memspace_alloc (memspace, size, pin);
>    else
>      return calloc (1, size);

..., here, we've got a duplicated (and thus always-false) expression
'memspace == ompx_unified_shared_mem_space' (..., which
'-Wduplicated-cond' fails to report; <https://gcc.gnu.org/PR108753>
"'-Wduplicated-cond' doesn't diagnose duplicated subexpressions"...).
Is the correct fix the following (conceptually like
'linux_memspace_alloc' cited above), or is there something that I fail to
understand?

     static void *
     linux_memspace_calloc (omp_memspace_handle_t memspace, size_t size, int 
pin)
     {
       if (memspace == ompx_unified_shared_mem_space)
         {
           void *ret = gomp_usm_alloc (size, GOMP_DEVICE_ICV);
           memset (ret, 0, size);
           return ret;
         }
    -  else if (memspace == ompx_unified_shared_mem_space
    -      || pin)
    +  else if (pin)
         return linux_memspace_alloc (memspace, size, pin);
       else
         return calloc (1, size);

The following ones then again are conceptually like
'linux_memspace_alloc' cited above:

> @@ -77,9 +86,9 @@ static void
>  linux_memspace_free (omp_memspace_handle_t memspace, void *addr, size_t size,
>                    int pin)
>  {
> -  (void)memspace;
> -
> -  if (pin)
> +  if (memspace == ompx_unified_shared_mem_space)
> +    gomp_usm_free (addr, GOMP_DEVICE_ICV);
> +  else if (pin)
>      munmap (addr, size);
>    else
>      free (addr);
> @@ -89,7 +98,9 @@ static void *
>  linux_memspace_realloc (omp_memspace_handle_t memspace, void *addr,
>                       size_t oldsize, size_t size, int oldpin, int pin)
>  {
> -  if (oldpin && pin)
> +  if (memspace == ompx_unified_shared_mem_space)
> +    goto manual_realloc;
> +  else if (oldpin && pin)
>      {
>        void *newaddr = mremap (addr, oldsize, size, MREMAP_MAYMOVE);
>        if (newaddr == MAP_FAILED)
> @@ -98,18 +109,19 @@ linux_memspace_realloc (omp_memspace_handle_t memspace, 
> void *addr,
> [...]

..., and similar those here:

> --- a/libgomp/config/nvptx/allocator.c
> +++ b/libgomp/config/nvptx/allocator.c
> @@ -125,6 +125,8 @@ nvptx_memspace_alloc (omp_memspace_handle_t memspace, 
> size_t size)
>        __atomic_store_n (&__nvptx_lowlat_heap_root, root.raw, 
> MEMMODEL_RELEASE);
>        return result;
>      }
> +  else if (memspace == ompx_host_mem_space)
> +    return NULL;
>    else
>      return malloc (size);
>  }
> @@ -145,6 +147,8 @@ nvptx_memspace_calloc (omp_memspace_handle_t memspace, 
> size_t size)
>
>        return result;
>      }
> +  else if (memspace == ompx_host_mem_space)
> +    return NULL;
>    else
>      return calloc (1, size);
>  }
> @@ -354,6 +358,8 @@ nvptx_memspace_realloc (omp_memspace_handle_t memspace, 
> void *addr,
>       }
>        return result;
>      }
> +  else if (memspace == ompx_host_mem_space)
> +    return NULL;
>    else
>      return realloc (addr, size);
>  }

(I'd have added an explicit no-op (or, 'abort'?) to
'nvptx_memspace_free', but that's maybe just me...)  ;-\


> --- a/libgomp/libgomp.h
> +++ b/libgomp/libgomp.h

> +extern void * gomp_usm_alloc (size_t size, int device_num);
> +extern void gomp_usm_free (void *device_ptr, int device_num);
> +extern bool gomp_is_usm_ptr (void *ptr);

'gomp_is_usm_ptr' isn't defined/used anywhere; I'll remove it.


> --- a/libgomp/target.c
> +++ b/libgomp/target.c

> @@ -3740,6 +3807,9 @@ gomp_load_plugin_for_device (struct gomp_device_descr 
> *device,
>    DLSYM (unload_image);
>    DLSYM (alloc);
>    DLSYM (free);
> +  DLSYM_OPT (usm_alloc, usm_alloc);
> +  DLSYM_OPT (usm_free, usm_free);
> +  DLSYM_OPT (is_usm_ptr, is_usm_ptr);
>    DLSYM (dev2host);
>    DLSYM (host2dev);

As a sanity check, shouldn't we check that either none or all three of
those are defined, like in the 'if (cuda && cuda != 4) { [error] }' check
a bit further down?


Note that these remarks likewise apply to the current upstream
submission:
<https://inbox.sourceware.org/gcc-patches/ef374d055251b2bc65b97d7e54a0a72d811b869d.1657188329.git....@codesourcery.com>
"openmp, nvptx: ompx_unified_shared_mem_alloc".


Grüße
 Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955

Reply via email to