https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/175767
>From d530dd7d961f9c38abc286a8d95e7f46ed87e107 Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" <[email protected]> Date: Tue, 13 Jan 2026 09:35:20 -0500 Subject: [PATCH] [AMDGPU] Fix builtin crash with template parameter size argument __builtin_amdgcn_global_load_lds and related builtins (raw_ptr_buffer_load_lds, struct_ptr_buffer_load_lds, load_to_lds) crash when the size argument is instantiation-dependent (e.g., a template parameter or sizeof(T)) because the semantic checker calls VerifyIntegerConstantExpression without first checking if the expression is instantiation-dependent. This causes an assertion failure: "Expression evaluator can't be called on a dependent expression." The fix adds an isInstantiationDependent() check before evaluating the constant expression. Instantiation-dependent expressions are accepted during template definition and properly checked during template instantiation. Fixes a regression reported by the FBGEMM team when building with ROCm 7.2 for gfx950. --- clang/lib/Sema/SemaAMDGPU.cpp | 4 + .../amdgpu-global-load-lds-template.hip | 97 +++++++++++++++++++ 2 files changed, 101 insertions(+) create mode 100644 clang/test/SemaHIP/amdgpu-global-load-lds-template.hip diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 9d154c65c932e..b6eebf35296ef 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -44,6 +44,10 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, constexpr const int SizeIdx = 2; llvm::APSInt Size; Expr *ArgExpr = TheCall->getArg(SizeIdx); + // Check for instantiation-dependent expressions (e.g., involving template + // parameters). These will be checked again during template instantiation. + if (ArgExpr->isInstantiationDependent()) + return false; [[maybe_unused]] ExprResult R = SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size); assert(!R.isInvalid()); diff --git a/clang/test/SemaHIP/amdgpu-global-load-lds-template.hip b/clang/test/SemaHIP/amdgpu-global-load-lds-template.hip new file mode 100644 index 0000000000000..e049edabd1c51 --- /dev/null +++ b/clang/test/SemaHIP/amdgpu-global-load-lds-template.hip @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx950 -fsyntax-only -fcuda-is-device -verify %s + +// Test that __builtin_amdgcn_global_load_lds and related builtins accept +// instantiation-dependent expressions (e.g., template parameters, sizeof(T)) +// as the size argument. These should be accepted during template definition +// and checked during template instantiation. + +#define __device__ __attribute__((device)) +#define __shared__ __attribute__((shared)) + +using size_t = decltype(sizeof(int)); + +template<int N> +__device__ void test_global_load_lds_template(void* src, __shared__ void* dst) { + // Template parameter should be accepted as size argument + __builtin_amdgcn_global_load_lds(src, dst, N, 0, 0); // #load_lds +} + +template<int N> +__device__ void test_load_to_lds_template(void* src, __shared__ void* dst) { + __builtin_amdgcn_load_to_lds(src, dst, N, 0, 0); // #load_to_lds +} + +// Test with valid sizes - these should compile without errors +template __device__ void test_global_load_lds_template<1>(void*, __shared__ void*); +template __device__ void test_global_load_lds_template<2>(void*, __shared__ void*); +template __device__ void test_global_load_lds_template<4>(void*, __shared__ void*); +template __device__ void test_global_load_lds_template<12>(void*, __shared__ void*); +template __device__ void test_global_load_lds_template<16>(void*, __shared__ void*); + +template __device__ void test_load_to_lds_template<1>(void*, __shared__ void*); +template __device__ void test_load_to_lds_template<2>(void*, __shared__ void*); +template __device__ void test_load_to_lds_template<4>(void*, __shared__ void*); +template __device__ void test_load_to_lds_template<12>(void*, __shared__ void*); +template __device__ void test_load_to_lds_template<16>(void*, __shared__ void*); + +// Test with constexpr computed from template parameter +template<int N> +__device__ void test_computed_size(void* src, __shared__ void* dst) { + constexpr int Size = N * 2; + __builtin_amdgcn_global_load_lds(src, dst, Size, 0, 0); // #computed +} + +template __device__ void test_computed_size<2>(void*, __shared__ void*); // Size = 4 +template __device__ void test_computed_size<8>(void*, __shared__ void*); // Size = 16 + +// Test that invalid sizes are caught at instantiation time +template __device__ void test_global_load_lds_template<5>(void*, __shared__ void*); // #inst1 +// expected-error@#load_lds {{invalid size value}} +// expected-note@#load_lds {{size must be 1, 2, 4, 12 or 16}} +// expected-note@#inst1 {{in instantiation of function template specialization 'test_global_load_lds_template<5>' requested here}} + +template __device__ void test_load_to_lds_template<7>(void*, __shared__ void*); // #inst2 +// expected-error@#load_to_lds {{invalid size value}} +// expected-note@#load_to_lds {{size must be 1, 2, 4, 12 or 16}} +// expected-note@#inst2 {{in instantiation of function template specialization 'test_load_to_lds_template<7>' requested here}} + +template __device__ void test_computed_size<3>(void*, __shared__ void*); // #inst3 +// expected-error@#computed {{invalid size value}} +// expected-note@#computed {{size must be 1, 2, 4, 12 or 16}} +// expected-note@#inst3 {{in instantiation of function template specialization 'test_computed_size<3>' requested here}} + +// Test with sizeof(T) - this is instantiation-dependent but NOT value-dependent. +// The type of sizeof is always size_t, but the value depends on T. +template<typename T> +__device__ void test_sizeof_type(void* src, __shared__ void* dst) { + __builtin_amdgcn_global_load_lds(src, dst, sizeof(T), 0, 0); // #sizeof_type +} + +// Valid: sizeof(char) = 1, sizeof(short) = 2, sizeof(int) = 4 +template __device__ void test_sizeof_type<char>(void*, __shared__ void*); +template __device__ void test_sizeof_type<short>(void*, __shared__ void*); +template __device__ void test_sizeof_type<int>(void*, __shared__ void*); + +// Invalid: sizeof(double) = 8 (not a valid size) +struct Eight { char x[8]; }; +template __device__ void test_sizeof_type<Eight>(void*, __shared__ void*); // #inst_sizeof +// expected-error@#sizeof_type {{invalid size value}} +// expected-note@#sizeof_type {{size must be 1, 2, 4, 12 or 16}} +// expected-note@#inst_sizeof {{in instantiation of function template specialization 'test_sizeof_type<Eight>' requested here}} + +// Test with expression involving both type and non-type template parameters +template<typename T, int N> +__device__ void test_mixed_dependent(void* src, __shared__ void* dst) { + __builtin_amdgcn_global_load_lds(src, dst, sizeof(T) * N, 0, 0); // #mixed +} + +// Valid: sizeof(short) * 2 = 4 +template __device__ void test_mixed_dependent<short, 2>(void*, __shared__ void*); +// Valid: sizeof(int) * 4 = 16 +template __device__ void test_mixed_dependent<int, 4>(void*, __shared__ void*); + +// Invalid: sizeof(int) * 2 = 8 +template __device__ void test_mixed_dependent<int, 2>(void*, __shared__ void*); // #inst_mixed +// expected-error@#mixed {{invalid size value}} +// expected-note@#mixed {{size must be 1, 2, 4, 12 or 16}} +// expected-note@#inst_mixed {{in instantiation of function template specialization 'test_mixed_dependent<int, 2>' requested here}} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
