MI210 is xnack disabled by default. Compile with :xnack+ will produce
GPU kernels that cannot be run on MI210.
Two targets, gfx90a_xnack and gfx90a_noxnack are introduced to replace
gfx90a, for the following reason:
Upstream usually ships 2 version: gfx90a:xnack-, gfx90a:xnack+. Although
a single gfx90a should have maximum compatibility, According to [1,2],
compile with xnack+/xnack- may have better performance on xnack
enabled/disabled GPUs. Therefore we ship both the target, align with
upstream. gfx900 is also appended with :xnack- to align with upstream
default.
However, some ROCm libraries does not need xnack feature flag, such as
sci-libs/miopen and sci-libs/rocFFT, so "--no-xnack-flag" option is
introduced to stop appending xnack feature. Demonstrate this option in
the second example ebuild.
Update description file profiles/desc/amdgpu_targets.desc accordingly.
Github PR: https://github.com/gentoo/gentoo/pull/33861
[1] https://llvm.org/docs/AMDGPUUsage.html#target-features
[2]
https://docs.olcf.ornl.gov/systems/crusher_quick_start_guide.html#compiling-hip-kernels-for-specific-xnack-modes
Signed-off-by: Yiyang Wu
---
eclass/rocm.eclass| 31 ++-
profiles/desc/amdgpu_targets.desc | 5 -
2 files changed, 26 insertions(+), 10 deletions(-)
diff --git a/eclass/rocm.eclass b/eclass/rocm.eclass
index b78dfea1cc31..3209c003e323 100644
--- a/eclass/rocm.eclass
+++ b/eclass/rocm.eclass
@@ -60,7 +60,8 @@
#
# Examples for packages depend on ROCm libraries -- a package which depends on
# rocBLAS, uses comma separated ${HCC_AMDGPU_TARGET} to determine GPU
-# architectures, and requires ROCm version >=5.1
+# architectures which does not accept :xnack feature flag, and requires ROCm
+# version >=5.1
# @CODE
# ROCM_VERSION=5.1
# inherit rocm
@@ -71,7 +72,7 @@
#
# src_configure() {
# if use rocm; then
-# local amdgpu_flags=$(get_amdgpu_flags)
+# local amdgpu_flags=$(get_amdgpu_flags --no-xnack-flag)
# export HCC_AMDGPU_TARGET=${amdgpu_flags//;/,}
# fi
# default
@@ -143,7 +144,7 @@ _rocm_set_globals() {
gfx803 gfx900 gfx1010 gfx1011 gfx1012 gfx1031
)
official_amdgpu_targets=(
- gfx906 gfx908 gfx90a gfx1030
+ gfx906 gfx908 gfx90a_xnack gfx90a_noxnack
gfx1030
)
;;
5.*|)
@@ -152,7 +153,7 @@ _rocm_set_globals() {
gfx1031 gfx1100 gfx1101 gfx1102
)
official_amdgpu_targets=(
- gfx906 gfx908 gfx90a gfx1030
+ gfx906 gfx908 gfx90a_xnack gfx90a_noxnack
gfx1030
)
;;
*)
@@ -181,26 +182,38 @@ unset -f _rocm_set_globals
# @FUNCTION: get_amdgpu_flags
-# @USAGE: get_amdgpu_flags
+# @USAGE: get_amdgpu_flags [--no-xnack-flag]
# @DESCRIPTION:
# Convert specified use flag of amdgpu_targets to compilation flags.
-# Append default target feature to GPU arch. See
+# Append default target feature xnack to GPU arch. See
# https://llvm.org/docs/AMDGPUUsage.html#target-features
+# If specified with --no-xnack-flag, do not append xnack feature flag.
get_amdgpu_flags() {
local amdgpu_target_flags
for gpu_target in ${AMDGPU_TARGETS}; do
local target_feature=
case ${gpu_target} in
- gfx906|gfx908)
+ gfx900|gfx906|gfx908)
+ # These GPUs ususally does not enable xnack, so
+ # disabling xnack generates faster GPU kernels.
target_feature=:xnack-
;;
- gfx90a)
+ gfx90a_noxnack)
+ gpu_target=gfx90a
+ target_feature=:xnack-
+ ;;
+ gfx90a_xnack)
+ gpu_target=gfx90a
target_feature=:xnack+
;;
*)
;;
esac
- amdgpu_target_flags+="${gpu_target}${target_feature};"
+ if [[ "$1" == "--no-xnack-flag" ]]; then
+ amdgpu_target_flags+="${gpu_target};"
+ else
+ amdgpu_target_flags+="${gpu_target}${target_feature};"
+ fi
done
echo "${amdgpu_target_flags}"
}
diff --git a/profiles/desc/amdgpu_targets.desc
b/profiles/desc/amdgpu_targets.desc
index 9c5739e9d9a4..77ffa43399f1 100644
--- a/profiles/desc/amdgpu_targets.desc
+++ b/profiles/desc/amdgpu_targets.desc
@@ -4,12 +4,15 @@
# Reference:
# GPU name and Architecture cod