https://github.com/bob80905 updated https://github.com/llvm/llvm-project/pull/195742
>From 7212c04362517b324db8dd75d40c0b929aa85f13 Mon Sep 17 00:00:00 2001 From: Joshua Batista <[email protected]> Date: Mon, 4 May 2026 13:56:40 -0700 Subject: [PATCH 1/5] first attempt --- clang/include/clang/Basic/Builtins.td | 6 ++ clang/lib/CodeGen/CGHLSLBuiltins.cpp | 35 +++++++++++ clang/lib/CodeGen/CGHLSLRuntime.h | 1 + .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 40 +++++++++++++ clang/lib/Sema/SemaHLSL.cpp | 44 ++++++++++++++ .../CodeGenHLSL/builtins/InterlockedAdd.hlsl | 59 +++++++++++++++++++ .../BuiltIns/InterlockedAdd-errors.hlsl | 42 +++++++++++++ .../InterlockedAdd-groupshared-warning.hlsl | 41 +++++++++++++ llvm/include/llvm/IR/IntrinsicsDirectX.td | 4 ++ llvm/include/llvm/IR/IntrinsicsSPIRV.td | 4 ++ .../Target/DirectX/DXILIntrinsicExpansion.cpp | 16 +++++ .../Target/SPIRV/SPIRVInstructionSelector.cpp | 32 ++++++++++ llvm/test/CodeGen/DirectX/InterlockedAdd.ll | 27 +++++++++ .../SPIRV/hlsl-intrinsics/InterlockedAdd.ll | 20 +++++++ 14 files changed, 371 insertions(+) create mode 100644 clang/test/CodeGenHLSL/builtins/InterlockedAdd.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl create mode 100644 clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl create mode 100644 llvm/test/CodeGen/DirectX/InterlockedAdd.ll create mode 100644 llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedAdd.ll diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 4a7eaeb3d353e..c1646df4ad2fa 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5503,6 +5503,12 @@ def HLSLWaveActiveBitAnd : LangBuiltin<"HLSL_LANG"> { let Prototype = "void (...)"; } +def HLSLInterlockedAdd : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_interlocked_add"]; + let Attributes = [NoThrow]; + let Prototype = "void (...)"; +} + def HLSLWaveActiveBallot : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_wave_active_ballot"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index b82a237ecefca..ddcc6f1980dd7 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -1382,6 +1382,41 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, &CGM.getModule(), ID, {Op->getType()}), ArrayRef{Op}, "hlsl.wave.active.bit.and"); } + case Builtin::BI__builtin_hlsl_interlocked_add: { + // HLSL signatures: + // void InterlockedAdd(inout T dest, T value); + // void InterlockedAdd(inout T dest, T value, out T original_value); + // The `inout` / `out` parameters are wrapped in HLSLOutArgExpr by Sema, so + // we can unconditionally cast and use the underlying lvalue directly. This + // ensures the atomic targets the original storage rather than the + // writeback temporary. + assert(isa<HLSLOutArgExpr>(E->getArg(0)) && + "InterlockedAdd dest argument must be an HLSLOutArgExpr (inout)"); + const auto *DestOut = cast<HLSLOutArgExpr>(E->getArg(0)); + LValue DestLV = EmitLValue(DestOut->getArgLValue()); + Value *Ptr = DestLV.getAddress().emitRawPointer(*this); + Value *Val = EmitScalarExpr(E->getArg(1)); + assert(E->getArg(1)->getType()->isIntegerType() && + "Intrinsic InterlockedAdd value operand must be an integer"); + + Intrinsic::ID ID = CGM.getHLSLRuntime().getInterlockedAddIntrinsic(); + Value *Call = EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID, + {Val->getType(), Ptr->getType()}), + ArrayRef<Value *>{Ptr, Val}, "hlsl.interlocked.add"); + + // The 3-arg overload writes the old value (the intrinsic's return value) + // into the `out original_value` parameter. + if (E->getNumArgs() == 3) { + assert(isa<HLSLOutArgExpr>(E->getArg(2)) && + "InterlockedAdd original_value argument must be an HLSLOutArgExpr " + "(out)"); + const auto *OrigOut = cast<HLSLOutArgExpr>(E->getArg(2)); + LValue OrigLV = EmitLValue(OrigOut->getArgLValue()); + EmitStoreThroughLValue(RValue::get(Call), OrigLV); + } + return Call; + } case Builtin::BI__builtin_hlsl_wave_active_ballot: { [[maybe_unused]] Value *Op = EmitScalarExpr(E->getArg(0)); assert(Op->getType()->isIntegerTy(1) && diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index b54cbab906056..4549e5f725790 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -150,6 +150,7 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitOr, wave_reduce_or) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitXor, wave_reduce_xor) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveBitAnd, wave_reduce_and) + GENERATE_HLSL_INTRINSIC_FUNCTION(InterlockedAdd, interlocked_add) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMax, wave_reduce_max) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveUMax, wave_reduce_umax) GENERATE_HLSL_INTRINSIC_FUNCTION(WaveActiveMin, wave_reduce_min) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index 3ee56b597da30..b95fb2525ebd9 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -218,5 +218,45 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_select) __detail::enable_if_t<__detail::is_arithmetic<T>::Value, vector<T, 4>> select( vector<bool, 4>, T, T); +//===----------------------------------------------------------------------===// +// InterlockedAdd builtins +//===----------------------------------------------------------------------===// + +/// \fn void InterlockedAdd(inout T dest, T value) +/// \fn void InterlockedAdd(inout T dest, T value, out T original_value) +/// \brief Performs a guaranteed atomic add of \a value to \a dest. +/// \param dest [inout] The destination memory location. +/// \param value [in] The value to add. +/// \param original_value [out] Optional. Receives the original value of +/// \a dest before the atomic add. + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout int, int); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout int, int, out int); + +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout uint, uint); +_HLSL_AVAILABILITY(shadermodel, 6.0) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout uint, uint, out uint); + +_HLSL_AVAILABILITY(shadermodel, 6.6) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout int64_t, int64_t); +_HLSL_AVAILABILITY(shadermodel, 6.6) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout int64_t, int64_t, out int64_t); + +_HLSL_AVAILABILITY(shadermodel, 6.6) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout uint64_t, uint64_t); +_HLSL_AVAILABILITY(shadermodel, 6.6) +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) +void InterlockedAdd(inout uint64_t, uint64_t, out uint64_t); + } // namespace hlsl #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_ \ No newline at end of file diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index aba1c5072a5fc..73f6fbfabc897 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -4375,6 +4375,50 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { TheCall->setType(ArgTyExpr); break; } + case Builtin::BI__builtin_hlsl_interlocked_add: { + // void InterlockedAdd(inout T dest, T value); + // void InterlockedAdd(inout T dest, T value, out T original_value); + unsigned NumArgs = TheCall->getNumArgs(); + if (NumArgs != 2 && NumArgs != 3) { + if (SemaRef.checkArgCount(TheCall, 2)) + return true; + } + + // The destination must be an integer scalar lvalue. + Expr *DestArg = TheCall->getArg(0); + QualType DestTy = DestArg->getType().getNonReferenceType(); + if (!DestTy->isIntegerType() || DestTy->isBooleanType()) { + SemaRef.Diag(DestArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) + << DestTy << SemaRef.Context.UnsignedIntTy << 1 << 0 << 0; + return true; + } + + // The value argument must have the same integer type as the destination. + Expr *ValArg = TheCall->getArg(1); + QualType ValTy = ValArg->getType(); + if (!SemaRef.Context.hasSameUnqualifiedType(DestTy, ValTy)) { + SemaRef.Diag(ValArg->getBeginLoc(), + diag::err_typecheck_convert_incompatible) + << ValTy << DestTy << 4 << 0 << 0; + return true; + } + + // The optional original_value argument must also match the destination + // type. + if (NumArgs == 3) { + Expr *OrigArg = TheCall->getArg(2); + QualType OrigTy = OrigArg->getType().getNonReferenceType(); + if (!SemaRef.Context.hasSameUnqualifiedType(DestTy, OrigTy)) { + SemaRef.Diag(OrigArg->getBeginLoc(), + diag::err_typecheck_convert_incompatible) + << OrigTy << DestTy << 4 << 0 << 0; + return true; + } + } + + TheCall->setType(SemaRef.Context.VoidTy); + break; + } // Note these are llvm builtins that we want to catch invalid intrinsic // generation. Normal handling of these builtins will occur elsewhere. case Builtin::BI__builtin_elementwise_bitreverse: { diff --git a/clang/test/CodeGenHLSL/builtins/InterlockedAdd.hlsl b/clang/test/CodeGenHLSL/builtins/InterlockedAdd.hlsl new file mode 100644 index 0000000000000..0bf60973648e5 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/InterlockedAdd.hlsl @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ +// RUN: dxil-pc-shadermodel6.6-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,DXCHECK + +// RUN: %clang_cc1 -std=hlsl2021 -finclude-default-header -triple \ +// RUN: spirv-pc-vulkan-compute %s -emit-llvm -disable-llvm-passes -o - | \ +// RUN: FileCheck %s --check-prefixes=CHECK,SPVCHECK + +// Test basic lowering of HLSL InterlockedAdd to the target intrinsic. + +groupshared int gs_i32; +groupshared uint gs_u32; +groupshared int64_t gs_i64; +groupshared uint64_t gs_u64; + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_int_2arg +// DXCHECK: call i32 @llvm.dx.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +// SPVCHECK: call spir_func i32 @llvm.spv.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +export void test_int_2arg(int v) { + InterlockedAdd(gs_i32, v); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_uint_2arg +// DXCHECK: call i32 @llvm.dx.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +// SPVCHECK: call spir_func i32 @llvm.spv.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +export void test_uint_2arg(uint v) { + InterlockedAdd(gs_u32, v); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_int_3arg +// DXCHECK: %[[R:.*]] = call i32 @llvm.dx.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +// SPVCHECK: %[[R:.*]] = call spir_func i32 @llvm.spv.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_i32{{.*}}, i32 %{{.*}}) +// CHECK: store i32 %[[R]], ptr {{.*}} +export void test_int_3arg(int v, out int orig) { + InterlockedAdd(gs_i32, v, orig); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_uint_3arg +// DXCHECK: %[[R:.*]] = call i32 @llvm.dx.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +// SPVCHECK: %[[R:.*]] = call spir_func i32 @llvm.spv.interlocked.add.i32.p3(ptr addrspace(3) {{.*}}@gs_u32{{.*}}, i32 %{{.*}}) +// CHECK: store i32 %[[R]], ptr {{.*}} +export void test_uint_3arg(uint v, out uint orig) { + InterlockedAdd(gs_u32, v, orig); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_int64_2arg +// DXCHECK: call i64 @llvm.dx.interlocked.add.i64.p3(ptr addrspace(3) {{.*}}@gs_i64{{.*}}, i64 %{{.*}}) +// SPVCHECK: call spir_func i64 @llvm.spv.interlocked.add.i64.p3(ptr addrspace(3) {{.*}}@gs_i64{{.*}}, i64 %{{.*}}) +export void test_int64_2arg(int64_t v) { + InterlockedAdd(gs_i64, v); +} + +// CHECK-LABEL: define {{(dso_local |hidden |internal |protected |spir_func )*}}void @{{.*}}test_uint64_3arg +// DXCHECK: %[[R:.*]] = call i64 @llvm.dx.interlocked.add.i64.p3(ptr addrspace(3) {{.*}}@gs_u64{{.*}}, i64 %{{.*}}) +// SPVCHECK: %[[R:.*]] = call spir_func i64 @llvm.spv.interlocked.add.i64.p3(ptr addrspace(3) {{.*}}@gs_u64{{.*}}, i64 %{{.*}}) +// CHECK: store i64 %[[R]], ptr {{.*}} +export void test_uint64_3arg(uint64_t v, out uint64_t orig) { + InterlockedAdd(gs_u64, v, orig); +} diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl new file mode 100644 index 0000000000000..2d5a55b4866d8 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl @@ -0,0 +1,42 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ +// RUN: -triple dxil-pc-shadermodel6.6-compute %s -fsyntax-only -verify + +void too_few(int v) { + int dest; + InterlockedAdd(dest); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 8 {{candidate function}} +} + +void too_many(int v, int extra) { + int dest; + int o; + InterlockedAdd(dest, v, o, extra); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 8 {{candidate function}} +} + +void float_dest(float v) { + float dest; + InterlockedAdd(dest, v); // expected-error{{call to 'InterlockedAdd' is ambiguous}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function}} +} + +void bool_dest(bool v) { + bool dest; + InterlockedAdd(dest, v); // expected-error{{call to 'InterlockedAdd' is ambiguous}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 3 {{candidate function}} +} + +struct S { int x; }; + +void struct_dest(int v) { + S s; + InterlockedAdd(s, v); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 8 {{candidate function}} +} + +void mismatched_type(int v) { + int dest; + uint orig; + InterlockedAdd(dest, v, orig); // expected-error{{call to 'InterlockedAdd' is ambiguous}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl new file mode 100644 index 0000000000000..decafc52ba3b3 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ +// RUN: -triple dxil-pc-shadermodel6.6-compute %s -fsyntax-only -verify + +// Verify that calling InterlockedAdd with a groupshared destination produces +// the diagnostic about HLSL inout + groupshared. The warning fires per +// candidate overload considered during resolution. + +groupshared int gs_i32; +groupshared uint gs_u32; +groupshared int64_t gs_i64; +groupshared uint64_t gs_u64; + +void test_2arg_int(int v) { + // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} + InterlockedAdd(gs_i32, v); +} + +void test_2arg_uint(uint v) { + // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} + InterlockedAdd(gs_u32, v); +} + +void test_2arg_i64(int64_t v) { + // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} + InterlockedAdd(gs_i64, v); +} + +void test_2arg_u64(uint64_t v) { + // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} + InterlockedAdd(gs_u64, v); +} + +void test_3arg_int(int v, out int orig) { + // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} + InterlockedAdd(gs_i32, v, orig); +} + +void test_3arg_uint(uint v, out uint orig) { + // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} + InterlockedAdd(gs_u32, v, orig); +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index f37180ce9084a..aad33252a7a01 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -253,6 +253,10 @@ def int_dx_wave_getlaneindex : DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrCon def int_dx_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_reduce_and : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; +def int_dx_interlocked_add : + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [llvm_anyptr_ty, LLVMMatchType<0>], + [IntrArgMemOnly]>; def int_dx_wave_reduce_max : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; def int_dx_wave_reduce_min : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem, IntrTriviallyScalarizable]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index 44e31a1410523..6047bead35bfd 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -141,6 +141,10 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] def int_spv_wave_reduce_or : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; def int_spv_wave_reduce_xor : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; def int_spv_wave_reduce_and : DefaultAttrsIntrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; + def int_spv_interlocked_add : + DefaultAttrsIntrinsic<[llvm_anyint_ty], + [llvm_anyptr_ty, LLVMMatchType<0>], + [IntrArgMemOnly]>; def int_spv_subgroup_ballot : ClangBuiltin<"__builtin_spirv_subgroup_ballot">, DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_i1_ty], [IntrConvergent, IntrNoMem]>; def int_spv_wave_reduce_umax : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp index 1a3240bebb259..579d66f3ad070 100644 --- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp +++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.cpp @@ -227,6 +227,7 @@ static bool isIntrinsicExpansion(Function &F) { case Intrinsic::dx_sign: case Intrinsic::dx_step: case Intrinsic::dx_radians: + case Intrinsic::dx_interlocked_add: case Intrinsic::usub_sat: case Intrinsic::vector_reduce_add: case Intrinsic::vector_reduce_fadd: @@ -770,6 +771,18 @@ static Value *expandRadiansIntrinsic(CallInst *Orig) { return Builder.CreateFMul(X, PiOver180); } +static Value *expandInterlockedAddIntrinsic(CallInst *Orig) { + // Lower @llvm.dx.interlocked.add(ptr, val) to `atomicrmw add ptr, val + // monotonic`. HLSL Interlocked operations imply no fence/barrier, which maps + // to monotonic ordering. The instruction's result is the old value, matching + // the intrinsic's return value. + Value *Ptr = Orig->getArgOperand(0); + Value *Val = Orig->getArgOperand(1); + IRBuilder<> Builder(Orig); + return Builder.CreateAtomicRMW(AtomicRMWInst::Add, Ptr, Val, MaybeAlign(), + AtomicOrdering::Monotonic); +} + static bool expandBufferLoadIntrinsic(CallInst *Orig, bool IsRaw) { IRBuilder<> Builder(Orig); @@ -1231,6 +1244,9 @@ static bool expandIntrinsic(Function &F, CallInst *Orig) { case Intrinsic::dx_radians: Result = expandRadiansIntrinsic(Orig); break; + case Intrinsic::dx_interlocked_add: + Result = expandInterlockedAddIntrinsic(Orig); + break; case Intrinsic::dx_resource_load_rawbuffer: if (expandBufferLoadIntrinsic(Orig, /*IsRaw*/ true)) return true; diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index aee3a29c6e42b..a2ef49a826155 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -177,6 +177,9 @@ class SPIRVInstructionSelector : public InstructionSelector { bool selectAtomicRMW(Register ResVReg, SPIRVTypeInst ResType, MachineInstr &I, unsigned NewOpcode, unsigned NegateOpcode = 0) const; + bool selectInterlockedAdd(Register ResVReg, SPIRVTypeInst ResType, + MachineInstr &I) const; + bool selectAtomicCmpXchg(Register ResVReg, SPIRVTypeInst ResType, MachineInstr &I) const; @@ -2314,6 +2317,33 @@ bool SPIRVInstructionSelector::selectAtomicRMW(Register ResVReg, return true; } +bool SPIRVInstructionSelector::selectInterlockedAdd(Register ResVReg, + SPIRVTypeInst ResType, + MachineInstr &I) const { + Register Ptr = I.getOperand(2).getReg(); + Register Value = I.getOperand(3).getReg(); + + SPIRV::StorageClass::StorageClass SC = GR.getPointerStorageClass(Ptr); + uint32_t Scope = static_cast<uint32_t>(SC == SPIRV::StorageClass::Workgroup + ? SPIRV::Scope::Workgroup + : SPIRV::Scope::Device); + Register ScopeReg = buildI32Constant(Scope, I); + + uint32_t MemSem = static_cast<uint32_t>(getMemSemanticsForStorageClass(SC)) | + static_cast<uint32_t>(SPIRV::MemorySemantics::None); + Register MemSemReg = buildI32Constant(MemSem, I); + + BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpAtomicIAdd)) + .addDef(ResVReg) + .addUse(GR.getSPIRVTypeID(ResType)) + .addUse(Ptr) + .addUse(ScopeReg) + .addUse(MemSemReg) + .addUse(Value) + .constrainAllUses(TII, TRI, RBI); + return true; +} + bool SPIRVInstructionSelector::selectUnmergeValues(MachineInstr &I) const { unsigned ArgI = I.getNumOperands() - 1; Register SrcReg = @@ -5003,6 +5033,8 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_wave_reduce_and: return selectWaveReduceOp(ResVReg, ResType, I, SPIRV::OpGroupNonUniformBitwiseAnd); + case Intrinsic::spv_interlocked_add: + return selectInterlockedAdd(ResVReg, ResType, I); case Intrinsic::spv_wave_reduce_umax: return selectWaveReduceMax(ResVReg, ResType, I, /*IsUnsigned*/ true); case Intrinsic::spv_wave_reduce_max: diff --git a/llvm/test/CodeGen/DirectX/InterlockedAdd.ll b/llvm/test/CodeGen/DirectX/InterlockedAdd.ll new file mode 100644 index 0000000000000..7c023f233196a --- /dev/null +++ b/llvm/test/CodeGen/DirectX/InterlockedAdd.ll @@ -0,0 +1,27 @@ +; RUN: opt -S -dxil-intrinsic-expansion -mtriple=dxil-pc-shadermodel6.6-compute %s | FileCheck %s + +; Verify llvm.dx.interlocked.add expands to atomicrmw add monotonic. + +@gs_i32 = internal addrspace(3) global i32 zeroinitializer +@gs_i64 = internal addrspace(3) global i64 zeroinitializer + +define i32 @test_i32(i32 %v) { +entry: +; CHECK-LABEL: @test_i32 +; CHECK: %[[R:.*]] = atomicrmw add ptr addrspace(3) @gs_i32, i32 %v monotonic +; CHECK: ret i32 %[[R]] + %r = call i32 @llvm.dx.interlocked.add.i32.p3(ptr addrspace(3) @gs_i32, i32 %v) + ret i32 %r +} + +define i64 @test_i64(i64 %v) { +entry: +; CHECK-LABEL: @test_i64 +; CHECK: %[[R:.*]] = atomicrmw add ptr addrspace(3) @gs_i64, i64 %v monotonic +; CHECK: ret i64 %[[R]] + %r = call i64 @llvm.dx.interlocked.add.i64.p3(ptr addrspace(3) @gs_i64, i64 %v) + ret i64 %r +} + +declare i32 @llvm.dx.interlocked.add.i32.p3(ptr addrspace(3), i32) +declare i64 @llvm.dx.interlocked.add.i64.p3(ptr addrspace(3), i64) diff --git a/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedAdd.ll b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedAdd.ll new file mode 100644 index 0000000000000..6cb497eca5021 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/hlsl-intrinsics/InterlockedAdd.ll @@ -0,0 +1,20 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-vulkan-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val --target-env spv1.4 %} + +; Test lowering of llvm.spv.interlocked.add to OpAtomicIAdd in workgroup storage. + +; CHECK-DAG: %[[#uint:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#scope_wg:]] = OpConstant %[[#uint]] 2 +; CHECK-DAG: %[[#mem_wg:]] = OpConstant %[[#uint]] 256 + +@gs_i32 = internal addrspace(3) global i32 zeroinitializer + +; CHECK-LABEL: Begin function test_i32 +define i32 @test_i32(i32 %v) { +entry: +; CHECK: %[[#R:]] = OpAtomicIAdd %[[#uint]] %[[#]] %[[#scope_wg]] %[[#mem_wg]] %[[#]] + %r = call i32 @llvm.spv.interlocked.add.i32.p3(ptr addrspace(3) @gs_i32, i32 %v) + ret i32 %r +} + +declare i32 @llvm.spv.interlocked.add.i32.p3(ptr addrspace(3), i32) >From bc4257d1d7c03ed9033dc8a0244c581d4352fe06 Mon Sep 17 00:00:00 2001 From: Joshua Batista <[email protected]> Date: Mon, 4 May 2026 14:34:32 -0700 Subject: [PATCH 2/5] self review --- clang/lib/CodeGen/CGHLSLBuiltins.cpp | 4 +--- clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl | 3 ++- .../SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl | 3 ++- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index ddcc6f1980dd7..5750fc31156ca 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -1387,9 +1387,7 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, // void InterlockedAdd(inout T dest, T value); // void InterlockedAdd(inout T dest, T value, out T original_value); // The `inout` / `out` parameters are wrapped in HLSLOutArgExpr by Sema, so - // we can unconditionally cast and use the underlying lvalue directly. This - // ensures the atomic targets the original storage rather than the - // writeback temporary. + // we can unconditionally cast and use the underlying lvalue directly. assert(isa<HLSLOutArgExpr>(E->getArg(0)) && "InterlockedAdd dest argument must be an HLSLOutArgExpr (inout)"); const auto *DestOut = cast<HLSLOutArgExpr>(E->getArg(0)); diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl index 2d5a55b4866d8..878560388dc2a 100644 --- a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ -// RUN: -triple dxil-pc-shadermodel6.6-compute %s -fsyntax-only -verify +// RUN: -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \ +// RUN: -disable-llvm-passes -verify void too_few(int v) { int dest; diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl index decafc52ba3b3..33413edf7d86f 100644 --- a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ -// RUN: -triple dxil-pc-shadermodel6.6-compute %s -fsyntax-only -verify +// RUN: -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \ +// RUN: -disable-llvm-passes -verify // Verify that calling InterlockedAdd with a groupshared destination produces // the diagnostic about HLSL inout + groupshared. The warning fires per >From 375e2659d418756d6c1f9834bd02d63b1b639019 Mon Sep 17 00:00:00 2001 From: Joshua Batista <[email protected]> Date: Mon, 11 May 2026 12:13:12 -0700 Subject: [PATCH 3/5] address Chris --- .../clang/Sema/HLSLExternalSemaSource.h | 1 + clang/lib/CodeGen/CGHLSLBuiltins.cpp | 24 +++--- .../lib/Headers/hlsl/hlsl_alias_intrinsics.h | 40 ---------- clang/lib/Sema/HLSLExternalSemaSource.cpp | 75 +++++++++++++++++++ clang/lib/Sema/SemaHLSL.cpp | 43 +---------- 5 files changed, 88 insertions(+), 95 deletions(-) diff --git a/clang/include/clang/Sema/HLSLExternalSemaSource.h b/clang/include/clang/Sema/HLSLExternalSemaSource.h index 049fc7b8fe3f2..77fd3fafc6d77 100644 --- a/clang/include/clang/Sema/HLSLExternalSemaSource.h +++ b/clang/include/clang/Sema/HLSLExternalSemaSource.h @@ -46,6 +46,7 @@ class HLSLExternalSemaSource : public ExternalSemaSource { void defineHLSLVectorAlias(); void defineHLSLMatrixAlias(); void defineHLSLTypesWithForwardDeclarations(); + void defineHLSLAtomicIntrinsics(); void onCompletion(CXXRecordDecl *Record, CompletionFunction Fn); }; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 5750fc31156ca..6ed415e785039 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -1383,15 +1383,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef{Op}, "hlsl.wave.active.bit.and"); } case Builtin::BI__builtin_hlsl_interlocked_add: { - // HLSL signatures: - // void InterlockedAdd(inout T dest, T value); - // void InterlockedAdd(inout T dest, T value, out T original_value); - // The `inout` / `out` parameters are wrapped in HLSLOutArgExpr by Sema, so - // we can unconditionally cast and use the underlying lvalue directly. - assert(isa<HLSLOutArgExpr>(E->getArg(0)) && - "InterlockedAdd dest argument must be an HLSLOutArgExpr (inout)"); - const auto *DestOut = cast<HLSLOutArgExpr>(E->getArg(0)); - LValue DestLV = EmitLValue(DestOut->getArgLValue()); + // HLSL signatures (synthesized as overloads in HLSLExternalSemaSource): + // void InterlockedAdd(groupshared|device T &dest, T value); + // void InterlockedAdd(groupshared|device T &dest, T value, + // T &original_value); + // Both `dest` and `original_value` are plain references, so we can use + // the underlying lvalue directly without HLSLOutArgExpr unwrapping. + LValue DestLV = EmitLValue(E->getArg(0)); Value *Ptr = DestLV.getAddress().emitRawPointer(*this); Value *Val = EmitScalarExpr(E->getArg(1)); assert(E->getArg(1)->getType()->isIntegerType() && @@ -1404,13 +1402,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, ArrayRef<Value *>{Ptr, Val}, "hlsl.interlocked.add"); // The 3-arg overload writes the old value (the intrinsic's return value) - // into the `out original_value` parameter. + // into the `original_value` reference parameter. if (E->getNumArgs() == 3) { - assert(isa<HLSLOutArgExpr>(E->getArg(2)) && - "InterlockedAdd original_value argument must be an HLSLOutArgExpr " - "(out)"); - const auto *OrigOut = cast<HLSLOutArgExpr>(E->getArg(2)); - LValue OrigLV = EmitLValue(OrigOut->getArgLValue()); + LValue OrigLV = EmitLValue(E->getArg(2)); EmitStoreThroughLValue(RValue::get(Call), OrigLV); } return Call; diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index b95fb2525ebd9..3ee56b597da30 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -218,45 +218,5 @@ _HLSL_BUILTIN_ALIAS(__builtin_hlsl_select) __detail::enable_if_t<__detail::is_arithmetic<T>::Value, vector<T, 4>> select( vector<bool, 4>, T, T); -//===----------------------------------------------------------------------===// -// InterlockedAdd builtins -//===----------------------------------------------------------------------===// - -/// \fn void InterlockedAdd(inout T dest, T value) -/// \fn void InterlockedAdd(inout T dest, T value, out T original_value) -/// \brief Performs a guaranteed atomic add of \a value to \a dest. -/// \param dest [inout] The destination memory location. -/// \param value [in] The value to add. -/// \param original_value [out] Optional. Receives the original value of -/// \a dest before the atomic add. - -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout int, int); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout int, int, out int); - -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout uint, uint); -_HLSL_AVAILABILITY(shadermodel, 6.0) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout uint, uint, out uint); - -_HLSL_AVAILABILITY(shadermodel, 6.6) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout int64_t, int64_t); -_HLSL_AVAILABILITY(shadermodel, 6.6) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout int64_t, int64_t, out int64_t); - -_HLSL_AVAILABILITY(shadermodel, 6.6) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout uint64_t, uint64_t); -_HLSL_AVAILABILITY(shadermodel, 6.6) -_HLSL_BUILTIN_ALIAS(__builtin_hlsl_interlocked_add) -void InterlockedAdd(inout uint64_t, uint64_t, out uint64_t); - } // namespace hlsl #endif //_HLSL_HLSL_ALIAS_INTRINSICS_H_ \ No newline at end of file diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 235ede8eb0bf0..5293e782eba4f 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -18,7 +18,9 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" #include "clang/AST/Type.h" +#include "clang/Basic/AddressSpaces.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Preprocessor.h" #include "clang/Sema/Lookup.h" #include "clang/Sema/Sema.h" #include "clang/Sema/SemaHLSL.h" @@ -53,6 +55,7 @@ void HLSLExternalSemaSource::InitializeSema(Sema &S) { (void)HLSLNamespace->getCanonicalDecl()->decls_begin(); defineTrivialHLSLTypes(); defineHLSLTypesWithForwardDeclarations(); + defineHLSLAtomicIntrinsics(); // This adds a `using namespace hlsl` directive. In DXC, we don't put HLSL's // built in types inside a namespace, but we are planning to change that in @@ -639,6 +642,78 @@ void HLSLExternalSemaSource::defineHLSLTypesWithForwardDeclarations() { }); } +// Build a single overload of an HLSL atomic intrinsic in the hlsl namespace. +// `dest` is an address-space-qualified reference; `original_value` (when +// present) is a plain reference. The synthesized FunctionDecl aliases the +// underlying clang builtin via BuiltinAliasAttr. +static void buildAtomicOverload(Sema &S, NamespaceDecl *NS, StringRef FuncName, + StringRef BuiltinName, QualType ElemTy, + LangAS DestAS, bool ThreeArg) { + ASTContext &AST = S.getASTContext(); + + QualType DestTy = + AST.getLValueReferenceType(AST.getAddrSpaceQualType(ElemTy, DestAS)); + QualType OrigRefTy = AST.getLValueReferenceType(ElemTy); + + SmallVector<QualType, 3> ParamTypes; + ParamTypes.push_back(DestTy); + ParamTypes.push_back(ElemTy); + if (ThreeArg) + ParamTypes.push_back(OrigRefTy); + + FunctionProtoType::ExtProtoInfo EPI; + QualType FuncTy = AST.getFunctionType(AST.VoidTy, ParamTypes, EPI); + auto *TSInfo = AST.getTrivialTypeSourceInfo(FuncTy, SourceLocation()); + + IdentifierInfo &FuncII = AST.Idents.get(FuncName, tok::TokenKind::identifier); + DeclarationNameInfo NameInfo(DeclarationName(&FuncII), SourceLocation()); + + FunctionDecl *FD = FunctionDecl::Create( + AST, NS, SourceLocation(), NameInfo, FuncTy, TSInfo, SC_Extern, + /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, + /*hasWrittenPrototype=*/true); + + static const char *const ParamNames[] = {"dest", "value", "original_value"}; + SmallVector<ParmVarDecl *, 3> ParmDecls; + for (unsigned I = 0, E = ParamTypes.size(); I != E; ++I) { + IdentifierInfo &PII = + AST.Idents.get(ParamNames[I], tok::TokenKind::identifier); + ParmVarDecl *Parm = ParmVarDecl::Create( + AST, FD, SourceLocation(), SourceLocation(), &PII, ParamTypes[I], + AST.getTrivialTypeSourceInfo(ParamTypes[I], SourceLocation()), SC_None, + nullptr); + Parm->setScopeInfo(0, I); + ParmDecls.push_back(Parm); + } + FD->setParams(ParmDecls); + + IdentifierInfo &BuiltinII = + S.getPreprocessor().getIdentifierTable().get(BuiltinName); + FD->addAttr(BuiltinAliasAttr::CreateImplicit(AST, &BuiltinII)); + FD->setImplicit(); + NS->addDecl(FD); +} + +// Synthesize the InterlockedAdd overload set: {int, uint, int64_t, uint64_t} +// x {groupshared, device} x {2-arg, 3-arg}. +static void defineHLSLInterlockedAdd(Sema &S, NamespaceDecl *NS) { + ASTContext &AST = S.getASTContext(); + QualType Elems[] = {AST.IntTy, AST.UnsignedIntTy, AST.LongLongTy, + AST.UnsignedLongLongTy}; + LangAS AddrSpaces[] = {LangAS::hlsl_groupshared, LangAS::hlsl_device}; + + for (QualType ElemTy : Elems) + for (LangAS AS : AddrSpaces) + for (bool ThreeArg : {false, true}) + buildAtomicOverload(S, NS, "InterlockedAdd", + "__builtin_hlsl_interlocked_add", ElemTy, AS, + ThreeArg); +} + +void HLSLExternalSemaSource::defineHLSLAtomicIntrinsics() { + defineHLSLInterlockedAdd(*SemaPtr, HLSLNamespace); +} + void HLSLExternalSemaSource::onCompletion(CXXRecordDecl *Record, CompletionFunction Fn) { if (!Record->isCompleteDefinition()) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 73f6fbfabc897..1fe0f592e0435 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -4376,46 +4376,9 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } case Builtin::BI__builtin_hlsl_interlocked_add: { - // void InterlockedAdd(inout T dest, T value); - // void InterlockedAdd(inout T dest, T value, out T original_value); - unsigned NumArgs = TheCall->getNumArgs(); - if (NumArgs != 2 && NumArgs != 3) { - if (SemaRef.checkArgCount(TheCall, 2)) - return true; - } - - // The destination must be an integer scalar lvalue. - Expr *DestArg = TheCall->getArg(0); - QualType DestTy = DestArg->getType().getNonReferenceType(); - if (!DestTy->isIntegerType() || DestTy->isBooleanType()) { - SemaRef.Diag(DestArg->getBeginLoc(), diag::err_builtin_invalid_arg_type) - << DestTy << SemaRef.Context.UnsignedIntTy << 1 << 0 << 0; - return true; - } - - // The value argument must have the same integer type as the destination. - Expr *ValArg = TheCall->getArg(1); - QualType ValTy = ValArg->getType(); - if (!SemaRef.Context.hasSameUnqualifiedType(DestTy, ValTy)) { - SemaRef.Diag(ValArg->getBeginLoc(), - diag::err_typecheck_convert_incompatible) - << ValTy << DestTy << 4 << 0 << 0; - return true; - } - - // The optional original_value argument must also match the destination - // type. - if (NumArgs == 3) { - Expr *OrigArg = TheCall->getArg(2); - QualType OrigTy = OrigArg->getType().getNonReferenceType(); - if (!SemaRef.Context.hasSameUnqualifiedType(DestTy, OrigTy)) { - SemaRef.Diag(OrigArg->getBeginLoc(), - diag::err_typecheck_convert_incompatible) - << OrigTy << DestTy << 4 << 0 << 0; - return true; - } - } - + // Overload resolution against the synthesized FunctionDecls in + // HLSLExternalSemaSource has already validated argument count, integer + // type matching, and the address-space requirement on `dest`. TheCall->setType(SemaRef.Context.VoidTy); break; } >From b81bd14eca59f302fb51f1c5f87b48c4d5fb3703 Mon Sep 17 00:00:00 2001 From: Joshua Batista <[email protected]> Date: Mon, 11 May 2026 14:27:09 -0700 Subject: [PATCH 4/5] update tests with new overload note messages --- clang/lib/Sema/HLSLExternalSemaSource.cpp | 11 ++- .../BuiltIns/InterlockedAdd-errors.hlsl | 92 ++++++++++--------- .../InterlockedAdd-groupshared-warning.hlsl | 42 --------- 3 files changed, 55 insertions(+), 90 deletions(-) delete mode 100644 clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index 5293e782eba4f..b0e61c2144fe7 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -666,11 +666,11 @@ static void buildAtomicOverload(Sema &S, NamespaceDecl *NS, StringRef FuncName, auto *TSInfo = AST.getTrivialTypeSourceInfo(FuncTy, SourceLocation()); IdentifierInfo &FuncII = AST.Idents.get(FuncName, tok::TokenKind::identifier); - DeclarationNameInfo NameInfo(DeclarationName(&FuncII), SourceLocation()); + DeclarationName FuncDeclName(&FuncII); FunctionDecl *FD = FunctionDecl::Create( - AST, NS, SourceLocation(), NameInfo, FuncTy, TSInfo, SC_Extern, - /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, + AST, NS, SourceLocation(), SourceLocation(), FuncDeclName, FuncTy, TSInfo, + SC_Extern, /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/true); static const char *const ParamNames[] = {"dest", "value", "original_value"}; @@ -698,8 +698,9 @@ static void buildAtomicOverload(Sema &S, NamespaceDecl *NS, StringRef FuncName, // x {groupshared, device} x {2-arg, 3-arg}. static void defineHLSLInterlockedAdd(Sema &S, NamespaceDecl *NS) { ASTContext &AST = S.getASTContext(); - QualType Elems[] = {AST.IntTy, AST.UnsignedIntTy, AST.LongLongTy, - AST.UnsignedLongLongTy}; + // HLSL: int64_t == long, uint64_t == unsigned long (see hlsl_basic_types.h). + QualType Elems[] = {AST.IntTy, AST.UnsignedIntTy, AST.LongTy, + AST.UnsignedLongTy}; LangAS AddrSpaces[] = {LangAS::hlsl_groupshared, LangAS::hlsl_device}; for (QualType ElemTy : Elems) diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl index 878560388dc2a..8fb1a5aaa87a1 100644 --- a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl @@ -1,43 +1,49 @@ -// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ -// RUN: -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \ -// RUN: -disable-llvm-passes -verify - -void too_few(int v) { - int dest; - InterlockedAdd(dest); // expected-error{{no matching function for call to 'InterlockedAdd'}} - // expected-note@hlsl/hlsl_alias_intrinsics.h:* 8 {{candidate function}} -} - -void too_many(int v, int extra) { - int dest; - int o; - InterlockedAdd(dest, v, o, extra); // expected-error{{no matching function for call to 'InterlockedAdd'}} - // expected-note@hlsl/hlsl_alias_intrinsics.h:* 8 {{candidate function}} -} - -void float_dest(float v) { - float dest; - InterlockedAdd(dest, v); // expected-error{{call to 'InterlockedAdd' is ambiguous}} - // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function}} -} - -void bool_dest(bool v) { - bool dest; - InterlockedAdd(dest, v); // expected-error{{call to 'InterlockedAdd' is ambiguous}} - // expected-note@hlsl/hlsl_alias_intrinsics.h:* 3 {{candidate function}} -} - -struct S { int x; }; - -void struct_dest(int v) { - S s; - InterlockedAdd(s, v); // expected-error{{no matching function for call to 'InterlockedAdd'}} - // expected-note@hlsl/hlsl_alias_intrinsics.h:* 8 {{candidate function}} -} - -void mismatched_type(int v) { - int dest; - uint orig; - InterlockedAdd(dest, v, orig); // expected-error{{call to 'InterlockedAdd' is ambiguous}} - // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function}} -} +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ +// RUN: -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \ +// RUN: -disable-llvm-passes -verify + +// InterlockedAdd is provided as a set of address-space-qualified overloads +// (groupshared/device, {int,uint,int64_t,uint64_t}, 2-arg/3-arg). All arg +// mismatches surface as "no matching function" with 16 candidates. The +// candidate notes come from synthesized FunctionDecls with no source +// location, so they are matched with `@*:*`. + +groupshared int gs_i32; +groupshared float gs_f32; +struct S { int x; }; +groupshared S gs_s; + +void too_few(int v) { + InterlockedAdd(gs_i32); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@*:* 16 {{candidate function}} +} + +void too_many(int v, int extra) { + int o; + InterlockedAdd(gs_i32, v, o, extra); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@*:* 16 {{candidate function}} +} + +// Atomics must operate on actual addresses in groupshared or device memory; +// passing a plain local (no address space) must not bind to any overload. +void local_dest(int v) { + int dest; + InterlockedAdd(dest, v); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@*:* 16 {{candidate function}} +} + +void float_dest(float v) { + InterlockedAdd(gs_f32, v); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@*:* 16 {{candidate function}} +} + +void struct_dest(int v) { + InterlockedAdd(gs_s, v); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@*:* 16 {{candidate function}} +} + +void mismatched_orig_type(int v) { + uint orig; + InterlockedAdd(gs_i32, v, orig); // expected-error{{no matching function for call to 'InterlockedAdd'}} + // expected-note@*:* 16 {{candidate function}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl deleted file mode 100644 index 33413edf7d86f..0000000000000 --- a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-groupshared-warning.hlsl +++ /dev/null @@ -1,42 +0,0 @@ -// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header \ -// RUN: -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only \ -// RUN: -disable-llvm-passes -verify - -// Verify that calling InterlockedAdd with a groupshared destination produces -// the diagnostic about HLSL inout + groupshared. The warning fires per -// candidate overload considered during resolution. - -groupshared int gs_i32; -groupshared uint gs_u32; -groupshared int64_t gs_i64; -groupshared uint64_t gs_u64; - -void test_2arg_int(int v) { - // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} - InterlockedAdd(gs_i32, v); -} - -void test_2arg_uint(uint v) { - // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} - InterlockedAdd(gs_u32, v); -} - -void test_2arg_i64(int64_t v) { - // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} - InterlockedAdd(gs_i64, v); -} - -void test_2arg_u64(uint64_t v) { - // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} - InterlockedAdd(gs_u64, v); -} - -void test_3arg_int(int v, out int orig) { - // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} - InterlockedAdd(gs_i32, v, orig); -} - -void test_3arg_uint(uint v, out uint orig) { - // expected-warning@+1 4 {{passing groupshared variable to a parameter annotated with inout}} - InterlockedAdd(gs_u32, v, orig); -} >From 4d052b49452f5d1a25b5a26a700d6911557c534c Mon Sep 17 00:00:00 2001 From: Joshua Batista <[email protected]> Date: Tue, 12 May 2026 17:18:01 -0700 Subject: [PATCH 5/5] Perform Sema validation, and set up address space validation for future interlocked* functions --- .../clang/Basic/DiagnosticSemaKinds.td | 4 ++ clang/lib/Sema/HLSLExternalSemaSource.cpp | 15 ++--- clang/lib/Sema/SemaHLSL.cpp | 60 ++++++++++++++++++- .../BuiltIns/InterlockedAdd-errors.hlsl | 51 ++++++++++++++++ 4 files changed, 120 insertions(+), 10 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index e059260778631..dd29431a23368 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13691,6 +13691,10 @@ def warn_hlsl_availability_unavailable : def error_hlsl_inout_scalar_extension : Error<"illegal scalar extension cast on argument %0 to %select{|in}1out paramemter">; def error_hlsl_inout_lvalue : Error<"cannot bind non-lvalue argument %0 to %select{|in}1out paramemter">; +def err_hlsl_atomic_arg_addr_space : Error< + "%ordinal0 argument to atomic builtin must reference groupshared or device " + "memory (was %1)">; + def err_hlsl_export_not_on_function : Error< "export declaration can only be used on functions">; diff --git a/clang/lib/Sema/HLSLExternalSemaSource.cpp b/clang/lib/Sema/HLSLExternalSemaSource.cpp index b0e61c2144fe7..e56793739b213 100644 --- a/clang/lib/Sema/HLSLExternalSemaSource.cpp +++ b/clang/lib/Sema/HLSLExternalSemaSource.cpp @@ -25,6 +25,7 @@ #include "clang/Sema/Sema.h" #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/TemplateDeduction.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" using namespace clang; @@ -673,16 +674,16 @@ static void buildAtomicOverload(Sema &S, NamespaceDecl *NS, StringRef FuncName, SC_Extern, /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, /*hasWrittenPrototype=*/true); - static const char *const ParamNames[] = {"dest", "value", "original_value"}; + constexpr const char *ParamNames[] = {"dest", "value", "original_value"}; SmallVector<ParmVarDecl *, 3> ParmDecls; - for (unsigned I = 0, E = ParamTypes.size(); I != E; ++I) { - IdentifierInfo &PII = - AST.Idents.get(ParamNames[I], tok::TokenKind::identifier); + unsigned I = 0; + for (auto [ParamType, ParamName] : llvm::zip(ParamTypes, ParamNames)) { + IdentifierInfo &PII = AST.Idents.get(ParamName, tok::TokenKind::identifier); ParmVarDecl *Parm = ParmVarDecl::Create( - AST, FD, SourceLocation(), SourceLocation(), &PII, ParamTypes[I], - AST.getTrivialTypeSourceInfo(ParamTypes[I], SourceLocation()), SC_None, + AST, FD, SourceLocation(), SourceLocation(), &PII, ParamType, + AST.getTrivialTypeSourceInfo(ParamType, SourceLocation()), SC_None, nullptr); - Parm->setScopeInfo(0, I); + Parm->setScopeInfo(0, I++); ParmDecls.push_back(Parm); } FD->setParams(ParmDecls); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 1fe0f592e0435..4d2e427c507c7 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -3273,6 +3273,20 @@ static bool CheckModifiableLValue(Sema *S, CallExpr *TheCall, return true; } +// Verifies that the argument at `ArgIndex` of `TheCall` refers to memory in +// one of `AllowedSpaces`. Intended for HLSL builtins (e.g. atomics). +static bool CheckArgAddrSpaceOneOf(Sema *S, CallExpr *TheCall, + unsigned ArgIndex, + ArrayRef<LangAS> AllowedSpaces) { + Expr *Arg = TheCall->getArg(ArgIndex); + QualType LValueTy = Arg->IgnoreCasts()->getType(); + if (llvm::is_contained(AllowedSpaces, LValueTy.getAddressSpace())) + return false; + S->Diag(Arg->getBeginLoc(), diag::err_hlsl_atomic_arg_addr_space) + << (ArgIndex + 1) << LValueTy; + return true; +} + static bool CheckNoDoubleVectors(Sema *S, SourceLocation Loc, int ArgOrdinal, clang::QualType PassedType) { const auto *VecTy = PassedType->getAs<VectorType>(); @@ -4376,9 +4390,49 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } case Builtin::BI__builtin_hlsl_interlocked_add: { - // Overload resolution against the synthesized FunctionDecls in - // HLSLExternalSemaSource has already validated argument count, integer - // type matching, and the address-space requirement on `dest`. + // The builtin's prototype in Builtins.td is `void (...)`, so direct calls + // to `__builtin_hlsl_interlocked_add` bypass argument checking entirely. + // When reached via the synthesized `InterlockedAdd` overload set in + // HLSLExternalSemaSource, overload resolution has already enforced the + // argument count, integer-type matching, and the address-space requirement + // on `dest`. The checks below are a safety net for callers that invoke the + // builtin by its mangled name and would otherwise reach CodeGen unchecked. + if (TheCall->getNumArgs() < 2) { + SemaRef.Diag(TheCall->getEndLoc(), + diag::err_typecheck_call_too_few_args_at_least) + << /*callee_type=*/0 << /*min_arg_count=*/2 << TheCall->getNumArgs() + << /*is_non_object=*/0 << TheCall->getSourceRange(); + return true; + } + if (SemaRef.checkArgCountAtMost(TheCall, 3)) + return true; + + QualType DestTy = TheCall->getArg(0)->getType().getUnqualifiedType(); + if (!DestTy->isIntegerType()) { + SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_builtin_invalid_arg_type) + << /*ordinal=*/1 << /*scalar*/ 1 << /*integer*/ 1 << /*no float*/ 0 + << DestTy; + return true; + } + + if (CheckModifiableLValue(&SemaRef, TheCall, 0)) + return true; + + if (CheckArgAddrSpaceOneOf(&SemaRef, TheCall, 0, + {LangAS::hlsl_groupshared, LangAS::hlsl_device})) + return true; + + if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(1), DestTy)) + return true; + + if (TheCall->getNumArgs() == 3) { + if (CheckArgTypeMatches(&SemaRef, TheCall->getArg(2), DestTy)) + return true; + if (CheckModifiableLValue(&SemaRef, TheCall, 2)) + return true; + } + TheCall->setType(SemaRef.Context.VoidTy); break; } diff --git a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl index 8fb1a5aaa87a1..8e5b352439cb5 100644 --- a/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/InterlockedAdd-errors.hlsl @@ -47,3 +47,54 @@ void mismatched_orig_type(int v) { InterlockedAdd(gs_i32, v, orig); // expected-error{{no matching function for call to 'InterlockedAdd'}} // expected-note@*:* 16 {{candidate function}} } + +// The tests below exercise direct invocations of the underlying clang builtin +// `__builtin_hlsl_interlocked_add`. These bypass overload resolution against +// the synthesized `InterlockedAdd` overload set (the builtin's prototype in +// Builtins.td is `void (...)`), so each error is produced by the explicit +// checks in SemaHLSL.cpp rather than by candidate-set rejection. + +void direct_too_few() { + __builtin_hlsl_interlocked_add(gs_i32); + // expected-error@-1 {{too few arguments to function call, expected at least 2, have 1}} +} + +void direct_too_many(int v, int extra) { + int o; + __builtin_hlsl_interlocked_add(gs_i32, v, o, extra); + // expected-error@-1 {{too many arguments to function call, expected at most 3, have 4}} +} + +void direct_non_integer_dest() { + S local_s; + __builtin_hlsl_interlocked_add(local_s, 1); + // expected-error@-1 {{1st argument must be a scalar integer type (was 'S')}} +} + +void direct_nonlvalue_dest(int v) { + __builtin_hlsl_interlocked_add(1, v); + // expected-error@-1 {{cannot bind non-lvalue argument '1' to out paramemter}} +} + +void direct_mismatched_value() { + uint uv = 1u; + __builtin_hlsl_interlocked_add(gs_i32, uv); + // expected-error@-1 {{passing 'uint' (aka 'unsigned int') to parameter of incompatible type 'int'}} +} + +void direct_mismatched_orig(int v) { + uint orig; + __builtin_hlsl_interlocked_add(gs_i32, v, orig); + // expected-error@-1 {{passing 'uint' (aka 'unsigned int') to parameter of incompatible type 'int'}} +} + +void direct_nonlvalue_orig(int v) { + __builtin_hlsl_interlocked_add(gs_i32, v, 1); + // expected-error@-1 {{cannot bind non-lvalue argument '1' to out paramemter}} +} + +void direct_default_as_dest(int v) { + int local; + __builtin_hlsl_interlocked_add(local, v); + // expected-error@-1 {{1st argument to atomic builtin must reference groupshared or device memory (was 'int')}} +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
