[og12] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation (was: [OG12 commit] amdgcn, libgomp: USM allocation update)

2023-02-16 Thread Thomas Schwinge
Hi!

On 2022-10-24T17:26:44+0100, Andrew Stubbs  wrote:
> I've committed this patch to the devel/omp/gcc-12 branch.

> --- a/libgomp/testsuite/libgomp.c/usm-1.c
> +++ b/libgomp/testsuite/libgomp.c/usm-1.c

> --- a/libgomp/testsuite/libgomp.c/usm-2.c
> +++ b/libgomp/testsuite/libgomp.c/usm-2.c

> --- a/libgomp/testsuite/libgomp.c/usm-3.c
> +++ b/libgomp/testsuite/libgomp.c/usm-3.c

> --- a/libgomp/testsuite/libgomp.c/usm-4.c
> +++ b/libgomp/testsuite/libgomp.c/usm-4.c

> @@ -1,5 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target omp_usm } */
> +/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target 
> offload_target_amdgcn } } */

I've pushed to devel/omp/gcc-12 branch
commit b4d4603df3fed290ccf721899be6bc69f037fe2b
"'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation",
see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From b4d4603df3fed290ccf721899be6bc69f037fe2b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Tue, 14 Feb 2023 18:57:04 +0100
Subject: [PATCH] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading
 compilation

Change '-foffload=amdgcn-amdhsa=[...]' to
'-foffload-options=amdgcn-amdhsa=[...]', so that non-GCN offloading compilation
doesn't get disabled.

Fix-up for og12 commit 6ec2c29dbbc19e7d2a8f991a5848e10c65c7c74c
"amdgcn, libgomp: USM allocation update".

	libgomp/
	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
	compilation.
	* testsuite/libgomp.c/usm-2.c: Likewise.
	* testsuite/libgomp.c/usm-3.c: Likewise.
	* testsuite/libgomp.c/usm-4.c: Likewise.
---
 libgomp/ChangeLog.omp   | 8 
 libgomp/testsuite/libgomp.c/usm-1.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-2.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-3.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-4.c | 2 +-
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 2a20516cd09..ecc14b4f537 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2023-02-16  Thomas Schwinge  
+
+	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
+	compilation.
+	* testsuite/libgomp.c/usm-2.c: Likewise.
+	* testsuite/libgomp.c/usm-3.c: Likewise.
+	* testsuite/libgomp.c/usm-4.c: Likewise.
+
 2023-02-16  Tobias Burnus  
 
 	Backported from master:
diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c
index f7bf897b839..35f37de7542 100644
--- a/libgomp/testsuite/libgomp.c/usm-1.c
+++ b/libgomp/testsuite/libgomp.c/usm-1.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c
index 3f52adbd7e1..783075edb54 100644
--- a/libgomp/testsuite/libgomp.c/usm-2.c
+++ b/libgomp/testsuite/libgomp.c/usm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c
index 225cba5fe58..733f0f34090 100644
--- a/libgomp/testsuite/libgomp.c/usm-3.c
+++ b/libgomp/testsuite/libgomp.c/usm-3.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c
index d4addfc587a..5bf99df3b24 100644
--- a/libgomp/testsuite/libgomp.c/usm-4.c
+++ b/libgomp/testsuite/libgomp.c/usm-4.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include 
 #include 
-- 
2.25.1



[OG12 commit] amdgcn, libgomp: USM allocation update

2022-10-24 Thread Andrew Stubbs
I've committed this patch to the devel/omp/gcc-12 branch. I will have to 
fold it into my previous OpenMP memory management patch series when I 
repost it.


The patch changes the internal memory allocation method such that memory 
is allocated in the regular heap and then marked as "coarse-grained", as 
opposed to allocating coarse-grained memory in the first place. The 
difference is that this is CPU first, not GPU first, which is typically 
the right way around, especially when we are using this for all possible 
allocations.


Andrewamdgcn, libgomp: USM allocation update

Allocate Unified Shared Memory via malloc and hsa_amd_svm_attributes_set,
instead of hsa_allocate_memory.  This scheme should be more efficient for
for memory that is first accessed by the CPU.

libgomp/ChangeLog:

* plugin/plugin-gcn.c (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED): New.
(HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): New.
(HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG): New.
(HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED): New.
(hsa_amd_svm_attribute_pair_t): New.
(struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_set_fn.
(dump_hsa_system_info): Dump HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED and
HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT.
(DLSYM_OPT_FN): New.
(init_hsa_runtime_functions): Add hsa_amd_svm_attributes_set.
(GOMP_OFFLOAD_usm_alloc): Use malloc and hsa_amd_svm_attributes_set.
(GOMP_OFFLOAD_usm_free): Use regular free.
* testsuite/libgomp.c/usm-1.c: Add -mxnack=on for amdgcn.
* testsuite/libgomp.c/usm-2.c: Likewise.
* testsuite/libgomp.c/usm-3.c: Likewise.
* testsuite/libgomp.c/usm-4.c: Likewise.

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index dd493f63912..4871a6a793b 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -113,6 +113,16 @@ struct gcn_thread
   int async;
 };
 
+/* TEMPORARY IMPORT, UNTIL hsa_ext_amd.h GETS UPDATED.  */
+const static int HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201;
+const static int HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202;
+const static int HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0;
+const static int HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1;
+typedef struct hsa_amd_svm_attribute_pair_s {
+  uint64_t attribute;
+  uint64_t value;
+} hsa_amd_svm_attribute_pair_t;
+
 /* As an HSA runtime is dlopened, following structure defines function
pointers utilized by the HSA plug-in.  */
 
@@ -195,6 +205,9 @@ struct hsa_runtime_fn_info
   hsa_status_t (*hsa_code_object_deserialize_fn)
 (void *serialized_code_object, size_t serialized_code_object_size,
  const char *options, hsa_code_object_t *code_object);
+  hsa_status_t (*hsa_amd_svm_attributes_set_fn)
+(void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
+ size_t attribute_count);
 };
 
 /* Structure describing the run-time and grid properties of an HSA kernel
@@ -720,6 +733,24 @@ dump_hsa_system_info (void)
 }
   else
 GCN_WARNING ("HSA_SYSTEM_INFO_EXTENSIONS: FAILED\n");
+
+  bool svm_supported;
+  status = hsa_fns.hsa_system_get_info_fn
+(HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED, &svm_supported);
+  if (status == HSA_STATUS_SUCCESS)
+GCN_DEBUG ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: %s\n",
+  (svm_supported ? "TRUE" : "FALSE"));
+  else
+GCN_WARNING ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: FAILED\n");
+
+  bool svm_accessible;
+  status = hsa_fns.hsa_system_get_info_fn
+(HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT, &svm_accessible);
+  if (status == HSA_STATUS_SUCCESS)
+GCN_DEBUG ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: %s\n",
+  (svm_accessible ? "TRUE" : "FALSE"));
+  else
+GCN_WARNING ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: FAILED\n");
 }
 
 /* Dump information about the available hardware.  */
@@ -1361,6 +1392,8 @@ init_hsa_runtime_functions (void)
   hsa_fns.function##_fn = dlsym (handle, #function); \
   if (hsa_fns.function##_fn == NULL) \
 return false;
+#define DLSYM_OPT_FN(function) \
+  hsa_fns.function##_fn = dlsym (handle, #function);
   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
   if (handle == NULL)
 return false;
@@ -1395,6 +1428,7 @@ init_hsa_runtime_functions (void)
   DLSYM_FN (hsa_signal_load_acquire)
   DLSYM_FN (hsa_queue_destroy)
   DLSYM_FN (hsa_code_object_deserialize)
+  DLSYM_OPT_FN (hsa_amd_svm_attributes_set)
   return true;
 #undef DLSYM_FN
 }
@@ -3886,15 +3920,38 @@ static struct usm_splay_tree_s usm_map = { NULL };
 
 /* Allocate memory suitable for Unified Shared Memory.
 
-   In fact, AMD memory need only be "coarse grained", which target
-   allocations already are.  We do need to track allocations so that
-   GOMP_OFFLOAD_is_usm_ptr can look them up.  */
+   Normal heap memory is already enabled for USM, but by default it is "fine-
+   grained" memory, meaning that the GPU must access it via the system bus,
+   slowly.  Changing the page to "coarse-