https://github.com/yxsamliu updated 
https://github.com/llvm/llvm-project/pull/175767

>From d530dd7d961f9c38abc286a8d95e7f46ed87e107 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <[email protected]>
Date: Tue, 13 Jan 2026 09:35:20 -0500
Subject: [PATCH] [AMDGPU] Fix builtin crash with template parameter size
 argument

__builtin_amdgcn_global_load_lds and related builtins (raw_ptr_buffer_load_lds,
struct_ptr_buffer_load_lds, load_to_lds) crash when the size argument is
instantiation-dependent (e.g., a template parameter or sizeof(T)) because
the semantic checker calls VerifyIntegerConstantExpression without first
checking if the expression is instantiation-dependent.

This causes an assertion failure:
  "Expression evaluator can't be called on a dependent expression."

The fix adds an isInstantiationDependent() check before evaluating the
constant expression. Instantiation-dependent expressions are accepted
during template definition and properly checked during template
instantiation.

Fixes a regression reported by the FBGEMM team when building with ROCm 7.2
for gfx950.
---
 clang/lib/Sema/SemaAMDGPU.cpp                 |  4 +
 .../amdgpu-global-load-lds-template.hip       | 97 +++++++++++++++++++
 2 files changed, 101 insertions(+)
 create mode 100644 clang/test/SemaHIP/amdgpu-global-load-lds-template.hip

diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 9d154c65c932e..b6eebf35296ef 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -44,6 +44,10 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned 
BuiltinID,
     constexpr const int SizeIdx = 2;
     llvm::APSInt Size;
     Expr *ArgExpr = TheCall->getArg(SizeIdx);
+    // Check for instantiation-dependent expressions (e.g., involving template
+    // parameters). These will be checked again during template instantiation.
+    if (ArgExpr->isInstantiationDependent())
+      return false;
     [[maybe_unused]] ExprResult R =
         SemaRef.VerifyIntegerConstantExpression(ArgExpr, &Size);
     assert(!R.isInvalid());
diff --git a/clang/test/SemaHIP/amdgpu-global-load-lds-template.hip 
b/clang/test/SemaHIP/amdgpu-global-load-lds-template.hip
new file mode 100644
index 0000000000000..e049edabd1c51
--- /dev/null
+++ b/clang/test/SemaHIP/amdgpu-global-load-lds-template.hip
@@ -0,0 +1,97 @@
+// RUN: %clang_cc1 -std=c++20 -triple amdgcn -target-cpu gfx950 -fsyntax-only 
-fcuda-is-device -verify %s
+
+// Test that __builtin_amdgcn_global_load_lds and related builtins accept
+// instantiation-dependent expressions (e.g., template parameters, sizeof(T))
+// as the size argument. These should be accepted during template definition
+// and checked during template instantiation.
+
+#define __device__ __attribute__((device))
+#define __shared__ __attribute__((shared))
+
+using size_t = decltype(sizeof(int));
+
+template<int N>
+__device__ void test_global_load_lds_template(void* src, __shared__ void* dst) 
{
+    // Template parameter should be accepted as size argument
+    __builtin_amdgcn_global_load_lds(src, dst, N, 0, 0); // #load_lds
+}
+
+template<int N>
+__device__ void test_load_to_lds_template(void* src, __shared__ void* dst) {
+    __builtin_amdgcn_load_to_lds(src, dst, N, 0, 0); // #load_to_lds
+}
+
+// Test with valid sizes - these should compile without errors
+template __device__ void test_global_load_lds_template<1>(void*, __shared__ 
void*);
+template __device__ void test_global_load_lds_template<2>(void*, __shared__ 
void*);
+template __device__ void test_global_load_lds_template<4>(void*, __shared__ 
void*);
+template __device__ void test_global_load_lds_template<12>(void*, __shared__ 
void*);
+template __device__ void test_global_load_lds_template<16>(void*, __shared__ 
void*);
+
+template __device__ void test_load_to_lds_template<1>(void*, __shared__ void*);
+template __device__ void test_load_to_lds_template<2>(void*, __shared__ void*);
+template __device__ void test_load_to_lds_template<4>(void*, __shared__ void*);
+template __device__ void test_load_to_lds_template<12>(void*, __shared__ 
void*);
+template __device__ void test_load_to_lds_template<16>(void*, __shared__ 
void*);
+
+// Test with constexpr computed from template parameter
+template<int N>
+__device__ void test_computed_size(void* src, __shared__ void* dst) {
+    constexpr int Size = N * 2;
+    __builtin_amdgcn_global_load_lds(src, dst, Size, 0, 0); // #computed
+}
+
+template __device__ void test_computed_size<2>(void*, __shared__ void*);  // 
Size = 4
+template __device__ void test_computed_size<8>(void*, __shared__ void*);  // 
Size = 16
+
+// Test that invalid sizes are caught at instantiation time
+template __device__ void test_global_load_lds_template<5>(void*, __shared__ 
void*); // #inst1
+// expected-error@#load_lds {{invalid size value}}
+// expected-note@#load_lds {{size must be 1, 2, 4, 12 or 16}}
+// expected-note@#inst1 {{in instantiation of function template specialization 
'test_global_load_lds_template<5>' requested here}}
+
+template __device__ void test_load_to_lds_template<7>(void*, __shared__ 
void*); // #inst2
+// expected-error@#load_to_lds {{invalid size value}}
+// expected-note@#load_to_lds {{size must be 1, 2, 4, 12 or 16}}
+// expected-note@#inst2 {{in instantiation of function template specialization 
'test_load_to_lds_template<7>' requested here}}
+
+template __device__ void test_computed_size<3>(void*, __shared__ void*);  // 
#inst3
+// expected-error@#computed {{invalid size value}}
+// expected-note@#computed {{size must be 1, 2, 4, 12 or 16}}
+// expected-note@#inst3 {{in instantiation of function template specialization 
'test_computed_size<3>' requested here}}
+
+// Test with sizeof(T) - this is instantiation-dependent but NOT 
value-dependent.
+// The type of sizeof is always size_t, but the value depends on T.
+template<typename T>
+__device__ void test_sizeof_type(void* src, __shared__ void* dst) {
+    __builtin_amdgcn_global_load_lds(src, dst, sizeof(T), 0, 0); // 
#sizeof_type
+}
+
+// Valid: sizeof(char) = 1, sizeof(short) = 2, sizeof(int) = 4
+template __device__ void test_sizeof_type<char>(void*, __shared__ void*);
+template __device__ void test_sizeof_type<short>(void*, __shared__ void*);
+template __device__ void test_sizeof_type<int>(void*, __shared__ void*);
+
+// Invalid: sizeof(double) = 8 (not a valid size)
+struct Eight { char x[8]; };
+template __device__ void test_sizeof_type<Eight>(void*, __shared__ void*); // 
#inst_sizeof
+// expected-error@#sizeof_type {{invalid size value}}
+// expected-note@#sizeof_type {{size must be 1, 2, 4, 12 or 16}}
+// expected-note@#inst_sizeof {{in instantiation of function template 
specialization 'test_sizeof_type<Eight>' requested here}}
+
+// Test with expression involving both type and non-type template parameters
+template<typename T, int N>
+__device__ void test_mixed_dependent(void* src, __shared__ void* dst) {
+    __builtin_amdgcn_global_load_lds(src, dst, sizeof(T) * N, 0, 0); // #mixed
+}
+
+// Valid: sizeof(short) * 2 = 4
+template __device__ void test_mixed_dependent<short, 2>(void*, __shared__ 
void*);
+// Valid: sizeof(int) * 4 = 16
+template __device__ void test_mixed_dependent<int, 4>(void*, __shared__ void*);
+
+// Invalid: sizeof(int) * 2 = 8
+template __device__ void test_mixed_dependent<int, 2>(void*, __shared__ 
void*); // #inst_mixed
+// expected-error@#mixed {{invalid size value}}
+// expected-note@#mixed {{size must be 1, 2, 4, 12 or 16}}
+// expected-note@#inst_mixed {{in instantiation of function template 
specialization 'test_mixed_dependent<int, 2>' requested here}}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to