https://github.com/hekota updated https://github.com/llvm/llvm-project/pull/169144
>From 7ffd077dc7132f1f9b10cee36fcc61d7b24a7dd3 Mon Sep 17 00:00:00 2001 From: Helena Kotas <[email protected]> Date: Fri, 21 Nov 2025 19:00:47 -0800 Subject: [PATCH 1/2] [HLSL] Update indexed vector elements individually Storing to individual elements of a vector through vector indexing needs to be handled as separate stores. We need to avoid the load/modify/store of the whole vector to prevent overwriting other vector elements that might be getting updated in parallel. Fixes #160208 Fixes #167729 --- clang/lib/CodeGen/CGExpr.cpp | 24 +++++++++++ clang/test/CodeGenHLSL/BoolVector.hlsl | 15 +++---- .../builtins/VectorElementStore.hlsl | 41 +++++++++++++++++++ clang/test/CodeGenHLSL/builtins/lit.hlsl | 6 ++- 4 files changed, 75 insertions(+), 11 deletions(-) create mode 100644 clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index f2451b16e78be..763ce6369fc51 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2575,6 +2575,30 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, bool isInit) { if (!Dst.isSimple()) { if (Dst.isVectorElt()) { + if (getLangOpts().HLSL) { + // In HLSL, storing to individual elements of a vector through + // VectorElt LValue needs to be handled as separate store. We need to + // avoid the load/modify/store sequence to prevent overwriting other + // elements that might be getting updated in parallel. + Address DstAddr = Dst.getVectorAddress(); + llvm::Type *DestAddrTy = DstAddr.getElementType(); + llvm::Type *ElemTy = DestAddrTy->getScalarType(); + CharUnits ElemAlign = CharUnits::fromQuantity( + CGM.getDataLayout().getPrefTypeAlign(ElemTy)); + + llvm::Value *Val = Src.getScalarVal(); + if (Val->getType()->getPrimitiveSizeInBits() < + ElemTy->getScalarSizeInBits()) + Val = Builder.CreateZExt(Val, ElemTy->getScalarType()); + + llvm::Value *Idx = Dst.getVectorIdx(); + llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0); + Address DstElemAddr = + Builder.CreateGEP(DstAddr, {Zero, Idx}, DestAddrTy, ElemAlign); + Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified()); + return; + } + // Read/modify/write the vector, inserting the new element. llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(), Dst.isVolatileQualified()); diff --git a/clang/test/CodeGenHLSL/BoolVector.hlsl b/clang/test/CodeGenHLSL/BoolVector.hlsl index d5054a5a92b5d..6be90e8f51ce2 100644 --- a/clang/test/CodeGenHLSL/BoolVector.hlsl +++ b/clang/test/CodeGenHLSL/BoolVector.hlsl @@ -69,9 +69,8 @@ bool fn4() { // CHECK-LABEL: define hidden void {{.*}}fn5{{.*}} // CHECK: [[Arr:%.*]] = alloca <2 x i32>, align 8 // CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[Arr]], align 8 -// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Arr]], align 8 -// CHECK-NEXT: [[V:%.*]] = insertelement <2 x i32> [[L]], i32 0, i32 1 -// CHECK-NEXT: store <2 x i32> [[V]], ptr [[Arr]], align 8 +// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[Arr]] +// CHECK-NEXT: store i32 0, ptr [[Ptr]], align 4 // CHECK-NEXT: ret void void fn5() { bool2 Arr = {true,true}; @@ -86,10 +85,9 @@ void fn5() { // CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[V]], align 4 // CHECK-NEXT: [[LV:%.*]] = trunc i32 [[Y]] to i1 // CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], i32 0, i32 0 -// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[BV]], align 1 // CHECK-NEXT: [[Z:%.*]] = zext i1 [[LV]] to i32 -// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 [[Z]], i32 1 -// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[BV]], align 1 +// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[BV]], i32 0, i32 1 +// CHECK-NEXT: store i32 [[Z]], ptr [[Ptr]], align 4 // CHECK-NEXT: ret void void fn6() { bool V = false; @@ -101,9 +99,8 @@ void fn6() { // CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 8 {{.*}}, i32 16, i1 false) // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr [[Arr]], i32 0, i32 0 -// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[Idx]], align 8 -// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 0, i32 1 -// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[Idx]], align 8 +// CHECK-NEXT: %[[Ptr:.*]] = getelementptr <2 x i32>, ptr [[Idx]], i32 0, i32 1 +// CHECK-NEXT: store i32 0, ptr %[[Ptr]], align 4 // CHECK-NEXT: ret void void fn7() { bool2 Arr[2] = {{true,true}, {false,false}}; diff --git a/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl new file mode 100644 index 0000000000000..e0c3aa54aaeba --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -finclude-default-header -emit-llvm -disable-llvm-passes \ +// RUN: -triple dxil-pc-shadermodel6.3-library %s -o - | FileCheck %s + +// Test groupshared vector element store for uint. +// CHECK-LABEL: test_uint4 +// CHECK: [[VAL:%.*]] = load i32, ptr %Val.addr, align 4 +// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4 +// CHECK: [[PTR:%.*]] = getelementptr <4 x i32>, ptr addrspace(3) @SMem, i32 0, i32 [[IDX]] +// CHECK: store i32 [[VAL]], ptr addrspace(3) [[PTR]], align 4 +// CHECK-: ret void +groupshared uint4 SMem; +void test_uint4(uint Idx, uint Val) { + SMem[Idx] = Val; +} + +// Test local vector element store for bool. +// CHECK: [[COND1:%.*]] = load i32, ptr addrspace(3) @Cond, align 4 +// CHECK: [[COND2:%.*]] = trunc i32 [[COND1]] to i1 +// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4 +// CHECK: [[COND3:%.*]] = zext i1 [[COND2]] to i32 +// CHECK: [[PTR:%.*]] = getelementptr <3 x i32>, ptr %Val, i32 0, i32 [[IDX]] +// CHECK: store i32 [[COND3]], ptr [[PTR]], align 4 +// CHECK: ret +groupshared bool Cond; +bool3 test_bool(uint Idx) { + bool3 Val = { false, false, false}; + Val[Idx] = Cond; + return Val; +} + +// Test resource vector element store for float. +// CHECK: [[VAL:%.*]] = load float, ptr %Val.addr, align 4 +// CHECK: [[RES_PTR:%.*]] = call {{.*}} ptr @_ZN4hlsl18RWStructuredBufferIDv4_fEixEj(ptr {{.*}} @_ZL3Buf, i32 noundef 0) +// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4 +// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[RES_PTR]], i32 0, i32 [[IDX]] +// CHECK: store float [[VAL]], ptr [[PTR]], align 4 +// CHECK: ret void +RWStructuredBuffer<float4> Buf : register(u0); +void test_float(uint Idx, float Val) { + Buf[0][Idx] = Val; +} diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl b/clang/test/CodeGenHLSL/builtins/lit.hlsl index b7979960de9f6..364c2e8794ea2 100644 --- a/clang/test/CodeGenHLSL/builtins/lit.hlsl +++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl @@ -11,7 +11,8 @@ // CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn half [[LOG]], %{{.*}} // CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn half @llvm.exp.f16(half %mul.i) // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, half 0xH0000, half %{{.*}} -// CHECK: %vecins.i = insertelement <4 x half> %{{.*}}, half %hlsl.select7.i, i32 2 +// CHECK: [[PTR:%.*]] = getelementptr <4 x half>, ptr %Result.i, i32 0, i32 2 +// CHECK: store half %hlsl.select7.i, ptr [[PTR]], align 2 // CHECK: ret <4 x half> %{{.*}} half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M); } @@ -26,6 +27,7 @@ half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, M // CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float [[LOG]], %{{.*}} // CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float @llvm.exp.f32(float %mul.i) // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, float 0.000000e+00, float %{{.*}} -// CHECK: %vecins.i = insertelement <4 x float> %{{.*}}, float %hlsl.select7.i, i32 2 +// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr %Result.i, i32 0, i32 2 +// CHECK: store float %hlsl.select7.i, ptr [[PTR]], align 4 // CHECK: ret <4 x float> %{{.*}} float4 test_lit_float(float NDotL, float NDotH, float M) { return lit(NDotL, NDotH, M); } >From 7c81f23667cb1205779e8ff515450388a4637f7a Mon Sep 17 00:00:00 2001 From: Helena Kotas <[email protected]> Date: Mon, 24 Nov 2025 18:22:39 -0800 Subject: [PATCH 2/2] code review feedback - add assert and update comment --- clang/lib/CodeGen/CGExpr.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 763ce6369fc51..f04be5bfd9daf 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2576,16 +2576,18 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, if (!Dst.isSimple()) { if (Dst.isVectorElt()) { if (getLangOpts().HLSL) { - // In HLSL, storing to individual elements of a vector through - // VectorElt LValue needs to be handled as separate store. We need to - // avoid the load/modify/store sequence to prevent overwriting other - // elements that might be getting updated in parallel. + // HLSL allows direct access to vector elements, so storing to + // individual elements of a vector through VectorElt is handled as + // separate store instructions. Address DstAddr = Dst.getVectorAddress(); llvm::Type *DestAddrTy = DstAddr.getElementType(); llvm::Type *ElemTy = DestAddrTy->getScalarType(); CharUnits ElemAlign = CharUnits::fromQuantity( CGM.getDataLayout().getPrefTypeAlign(ElemTy)); + assert(ElemTy->getScalarSizeInBits() >= 8 && + "vector element type must be at least byte-sized"); + llvm::Value *Val = Src.getScalarVal(); if (Val->getType()->getPrimitiveSizeInBits() < ElemTy->getScalarSizeInBits()) _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
