Tobias Burnus wrote:
Andrew Stubbs wrote:
On 07/10/2025 11:31, Tobias Burnus wrote:
This patch adds the currently unused static function
'is_integrated_apu'
to libgomp/plugin/plugin-{gcn,nvptx}.c.
While currently not in use ('#if 0'), I'd like to add it already now
as prep work. The idea is to use it to enable self mapping
automatically
be default if mapping it pointless (copying data despite sharing the
same memory controller). [See below for more.]
If going this far, why not make it live? I already posted the libgomp
parts a year ago, and there's not much:
https://patchwork.sourceware.org/project/gcc/patch/[email protected]/
I have now committed (r16-4354-gd2ad7e90834d9c) a slightly
modified version of the patch (as attached):
* I removed the comment about per-device (vs. per-device type)
in light of the patch Andrew mentioned.
* I added a reference to https://gcc.gnu.org/PR115279 as that's
required before we can automatically enable USM for APUs.
Without, map clauses for global variables ('declare target' variables)
are ignored, which leads to wrong code.
This patch is also a prerequisite for the auto-USM part of Andrew's
patch (unless the code has no 'declare target' for variables).
Tobias
commit d2ad7e90834d9c18e0c7104796f3131594e7bbfb
Author: Tobias Burnus <[email protected]>
Date: Fri Oct 10 09:48:37 2025 +0200
libgomp: Add is_integrated_apu function to plugin/plugin-{gcn,nvptx}.c
The added function is currently '#if 0' but is planned to be used to enable
self mapping automatically. Prerequisite for auto self maps is still mapping
'declare target' variables (if any, in libgomp) or converting all
'declare target' variables to 'declare target link' in the compiler
(as required for 'omp requires self_maps').
include/ChangeLog:
* hsa_ext_amd.h (enum hsa_amd_agent_info_s): Add
HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES.
(enum): Add HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU.
libgomp/ChangeLog:
* plugin/plugin-gcn.c (is_integrated_apu): New; currently '#if 0'.
* plugin/plugin-nvptx.c (is_integrated_apu): Likewise.
---
include/hsa_ext_amd.h | 10 +++++++-
libgomp/plugin/plugin-gcn.c | 55 +++++++++++++++++++++++++++++++++++++++++++
libgomp/plugin/plugin-nvptx.c | 18 ++++++++++++++
3 files changed, 82 insertions(+), 1 deletion(-)
diff --git a/include/hsa_ext_amd.h b/include/hsa_ext_amd.h
index c1c16536621..e29e88090eb 100644
--- a/include/hsa_ext_amd.h
+++ b/include/hsa_ext_amd.h
@@ -168,9 +168,17 @@ typedef enum hsa_amd_agent_info_s {
* selective workarounds for hardware errata.
* The type of this attribute is uint32_t.
*/
- HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012
+ HSA_AMD_AGENT_INFO_ASIC_REVISION = 0xA012,
+
+ /* Bitmask with memory properties of the agent. */
+ HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES = 0xA114
} hsa_amd_agent_info_t;
+
+enum {
+ HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU = (1 << 0)
+};
+
typedef struct hsa_amd_hdp_flush_s {
uint32_t* HDP_MEM_FLUSH_CNTL;
uint32_t* HDP_REG_FLUSH_CNTL;
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index 18f01e09002..cd5a19b0355 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3331,6 +3331,61 @@ gcn_exec (struct kernel_info *kernel,
/* }}} */
/* {{{ Generic Plugin API */
+#if 0 /* TODO: Use to enable self-mapping/USM automatically. */
+/* FIXME: The auto-self-map feature depends on still mapping 'declare target'
+ variables, even if ignoring all other mappings. Cf. PR 115279. */
+
+/* Return TRUE if the GPU is an APU, i.e. the GPU is integrated with the CPU
+ such that both use the same memory controller such that mapping or memory
+ migration is pointless. If CHECK_XNACK is TRUE, it additionally requires
+ that the GPU has *no* XNACK support otherwise FALSE is returned.
+
+ In theory, enabling unified-shared memory for APUs should always work,
+ however, with AMD GPUs some APUs (e.g. MI300A) still require XNACK to be
+ enabled as it is required to handle page faults.
+
+ Thus, for unified-shared memory access, either of the following must hold:
+ * HSA_AMD_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT is TRUE
+ This implies that all GPUs support USM access, either directly (as APU)
+ or via page migration. For MI300A, this is only the case if
+ HSA_AMD_SYSTEM_INFO_XNACK_ENABLED is TRUE.
+ * If the GPU an APU *and* it does not support XNACK. */
+
+static bool
+is_integrated_apu (struct agent_info *agent, bool check_xnack)
+{
+ enum {
+ HSACO_ATTR_UNSUPPORTED,
+ HSACO_ATTR_OFF,
+ HSACO_ATTR_ON,
+ HSACO_ATTR_ANY,
+ HSACO_ATTR_DEFAULT
+ };
+
+ bool is_apu;
+ uint8_t mem_prop[8];
+ hsa_status_t status;
+
+ status = hsa_fns.hsa_agent_get_info_fn (
+ agent->id, (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MEMORY_PROPERTIES,
+ mem_prop);
+ _Static_assert (HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU < 8,
+ "HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU < 8");
+ is_apu = (status == HSA_STATUS_SUCCESS
+ && (mem_prop[0] & (1 << HSA_AMD_MEMORY_PROPERTY_AGENT_IS_APU)));
+
+ if (check_xnack)
+ switch(agent->device_isa)
+ {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, ...) \
+ case ELF: return is_apu && (XNACK == HSACO_ATTR_UNSUPPORTED);
+#include "../../gcc/config/gcn/gcn-devices.def"
+ default: return false; /* Just to be save. */
+ }
+ return is_apu;
+}
+#endif
+
/* Return the name of the accelerator, which is "gcn". */
const char *
diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c
index eb7b5e59d8f..92c62ee5b86 100644
--- a/libgomp/plugin/plugin-nvptx.c
+++ b/libgomp/plugin/plugin-nvptx.c
@@ -1246,6 +1246,24 @@ nvptx_get_current_cuda_context (void)
return nvthd->ptx_dev->ctx;
}
+#if 0 /* TODO: Use to enable self-mapping/USM automatically. */
+/* FIXME: The auto-self-map feature depends on still mapping 'declare target'
+ variables, even if ignoring all other mappings. Cf. PR 115279. */
+
+/* Return TRUE if the GPU is integrated with host memory, i.e. GPU and
+ host share the same memory controller. As of Oct 2025, no such
+ Nvidia GPU seems to exist. */
+static bool
+is_integrated_apu (struct ptx_device *ptx_dev)
+{
+ int pi;
+ CUresult r;
+ r = CUDA_CALL_NOCHECK (cuDeviceGetAttribute, &pi,
+ CU_DEVICE_ATTRIBUTE_INTEGRATED, ptx_dev->dev);
+ return (r == CUDA_SUCCESS && pi == 1);
+}
+#endif
+
/* Plugin entry points. */
const char *