https://github.com/hekota updated 
https://github.com/llvm/llvm-project/pull/169144

>From 7ffd077dc7132f1f9b10cee36fcc61d7b24a7dd3 Mon Sep 17 00:00:00 2001
From: Helena Kotas <[email protected]>
Date: Fri, 21 Nov 2025 19:00:47 -0800
Subject: [PATCH 1/2] [HLSL] Update indexed vector elements individually

Storing to individual elements of a vector through vector indexing
needs to be handled as separate stores. We need to avoid the load/modify/store
of the whole vector to prevent overwriting other vector elements that might be
getting updated in parallel.

Fixes #160208
Fixes #167729
---
 clang/lib/CodeGen/CGExpr.cpp                  | 24 +++++++++++
 clang/test/CodeGenHLSL/BoolVector.hlsl        | 15 +++----
 .../builtins/VectorElementStore.hlsl          | 41 +++++++++++++++++++
 clang/test/CodeGenHLSL/builtins/lit.hlsl      |  6 ++-
 4 files changed, 75 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index f2451b16e78be..763ce6369fc51 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2575,6 +2575,30 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
                                              bool isInit) {
   if (!Dst.isSimple()) {
     if (Dst.isVectorElt()) {
+      if (getLangOpts().HLSL) {
+        // In HLSL, storing to individual elements of a vector through
+        // VectorElt LValue needs to be handled as separate store. We need to
+        // avoid the load/modify/store sequence to prevent overwriting other
+        // elements that might be getting updated in parallel.
+        Address DstAddr = Dst.getVectorAddress();
+        llvm::Type *DestAddrTy = DstAddr.getElementType();
+        llvm::Type *ElemTy = DestAddrTy->getScalarType();
+        CharUnits ElemAlign = CharUnits::fromQuantity(
+            CGM.getDataLayout().getPrefTypeAlign(ElemTy));
+
+        llvm::Value *Val = Src.getScalarVal();
+        if (Val->getType()->getPrimitiveSizeInBits() <
+            ElemTy->getScalarSizeInBits())
+          Val = Builder.CreateZExt(Val, ElemTy->getScalarType());
+
+        llvm::Value *Idx = Dst.getVectorIdx();
+        llvm::Value *Zero = llvm::ConstantInt::get(Int32Ty, 0);
+        Address DstElemAddr =
+            Builder.CreateGEP(DstAddr, {Zero, Idx}, DestAddrTy, ElemAlign);
+        Builder.CreateStore(Val, DstElemAddr, Dst.isVolatileQualified());
+        return;
+      }
+
       // Read/modify/write the vector, inserting the new element.
       llvm::Value *Vec = Builder.CreateLoad(Dst.getVectorAddress(),
                                             Dst.isVolatileQualified());
diff --git a/clang/test/CodeGenHLSL/BoolVector.hlsl 
b/clang/test/CodeGenHLSL/BoolVector.hlsl
index d5054a5a92b5d..6be90e8f51ce2 100644
--- a/clang/test/CodeGenHLSL/BoolVector.hlsl
+++ b/clang/test/CodeGenHLSL/BoolVector.hlsl
@@ -69,9 +69,8 @@ bool fn4() {
 // CHECK-LABEL: define hidden void {{.*}}fn5{{.*}}
 // CHECK: [[Arr:%.*]] = alloca <2 x i32>, align 8
 // CHECK-NEXT: store <2 x i32> splat (i32 1), ptr [[Arr]], align 8
-// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[Arr]], align 8
-// CHECK-NEXT: [[V:%.*]] = insertelement <2 x i32> [[L]], i32 0, i32 1
-// CHECK-NEXT: store <2 x i32> [[V]], ptr [[Arr]], align 8
+// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[Arr]]
+// CHECK-NEXT: store i32 0, ptr [[Ptr]], align 4
 // CHECK-NEXT: ret void
 void fn5() {
   bool2 Arr = {true,true};
@@ -86,10 +85,9 @@ void fn5() {
 // CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[V]], align 4
 // CHECK-NEXT: [[LV:%.*]] = trunc i32 [[Y]] to i1
 // CHECK-NEXT: [[BV:%.*]] = getelementptr inbounds nuw %struct.S, ptr [[S]], 
i32 0, i32 0
-// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[BV]], align 1
 // CHECK-NEXT: [[Z:%.*]] = zext i1 [[LV]] to i32
-// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 [[Z]], i32 1
-// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[BV]], align 1
+// CHECK-NEXT: [[Ptr:%.*]] = getelementptr <2 x i32>, ptr [[BV]], i32 0, i32 1
+// CHECK-NEXT: store i32 [[Z]], ptr [[Ptr]], align 4
 // CHECK-NEXT: ret void
 void fn6() {
   bool V = false;
@@ -101,9 +99,8 @@ void fn6() {
 // CHECK: [[Arr:%.*]] = alloca [2 x <2 x i32>], align 8
 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Arr]], ptr align 
8 {{.*}}, i32 16, i1 false)
 // CHECK-NEXT: [[Idx:%.*]] = getelementptr inbounds [2 x <2 x i32>], ptr 
[[Arr]], i32 0, i32 0
-// CHECK-NEXT: [[X:%.*]] = load <2 x i32>, ptr [[Idx]], align 8
-// CHECK-NEXT: [[VI:%.*]] = insertelement <2 x i32> [[X]], i32 0, i32 1
-// CHECK-NEXT: store <2 x i32> [[VI]], ptr [[Idx]], align 8
+// CHECK-NEXT: %[[Ptr:.*]] = getelementptr <2 x i32>, ptr [[Idx]], i32 0, i32 1
+// CHECK-NEXT: store i32 0, ptr %[[Ptr]], align 4
 // CHECK-NEXT: ret void
 void fn7() {
   bool2 Arr[2] = {{true,true}, {false,false}};
diff --git a/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl 
b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl
new file mode 100644
index 0000000000000..e0c3aa54aaeba
--- /dev/null
+++ b/clang/test/CodeGenHLSL/builtins/VectorElementStore.hlsl
@@ -0,0 +1,41 @@
+// RUN: %clang_cc1 -finclude-default-header -emit-llvm -disable-llvm-passes  \
+// RUN:   -triple dxil-pc-shadermodel6.3-library %s -o - | FileCheck %s
+
+// Test groupshared vector element store for uint.
+// CHECK-LABEL: test_uint4
+// CHECK: [[VAL:%.*]] = load i32, ptr %Val.addr, align 4
+// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4
+// CHECK: [[PTR:%.*]] = getelementptr <4 x i32>, ptr addrspace(3) @SMem, i32 
0, i32 [[IDX]]
+// CHECK: store i32 [[VAL]], ptr addrspace(3) [[PTR]], align 4
+// CHECK-: ret void
+groupshared uint4 SMem;
+void test_uint4(uint Idx, uint Val) {
+  SMem[Idx] = Val;
+}
+
+// Test local vector element store for bool.
+// CHECK: [[COND1:%.*]] = load i32, ptr addrspace(3) @Cond, align 4
+// CHECK: [[COND2:%.*]] = trunc i32 [[COND1]] to i1
+// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4
+// CHECK: [[COND3:%.*]] = zext i1 [[COND2]] to i32
+// CHECK: [[PTR:%.*]] = getelementptr <3 x i32>, ptr %Val, i32 0, i32 [[IDX]]
+// CHECK: store i32 [[COND3]], ptr [[PTR]], align 4
+// CHECK: ret
+groupshared bool Cond;
+bool3 test_bool(uint Idx) {
+  bool3 Val = { false, false, false};
+  Val[Idx] = Cond;
+  return Val;
+}
+
+// Test resource vector element store for float.
+// CHECK: [[VAL:%.*]] = load float, ptr %Val.addr, align 4
+// CHECK: [[RES_PTR:%.*]] = call {{.*}} ptr 
@_ZN4hlsl18RWStructuredBufferIDv4_fEixEj(ptr {{.*}} @_ZL3Buf, i32 noundef 0)
+// CHECK: [[IDX:%.*]] = load i32, ptr %Idx.addr, align 4
+// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr [[RES_PTR]], i32 0, i32 
[[IDX]]
+// CHECK: store float [[VAL]], ptr [[PTR]], align 4
+// CHECK: ret void
+RWStructuredBuffer<float4> Buf : register(u0);
+void test_float(uint Idx, float Val) {
+  Buf[0][Idx] = Val;
+}
diff --git a/clang/test/CodeGenHLSL/builtins/lit.hlsl 
b/clang/test/CodeGenHLSL/builtins/lit.hlsl
index b7979960de9f6..364c2e8794ea2 100644
--- a/clang/test/CodeGenHLSL/builtins/lit.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/lit.hlsl
@@ -11,7 +11,8 @@
 // CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn half [[LOG]], %{{.*}}
 // CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn half 
@llvm.exp.f16(half %mul.i)
 // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, 
half 0xH0000, half %{{.*}}
-// CHECK: %vecins.i = insertelement <4 x half> %{{.*}}, half %hlsl.select7.i, 
i32 2
+// CHECK: [[PTR:%.*]] = getelementptr <4 x half>, ptr %Result.i, i32 0, i32 2
+// CHECK: store half %hlsl.select7.i, ptr [[PTR]], align 2
 // CHECK: ret <4 x half> %{{.*}}
 half4 test_lit_half(half NDotL, half NDotH, half M) { return lit(NDotL, NDotH, 
M); }
 
@@ -26,6 +27,7 @@ half4 test_lit_half(half NDotL, half NDotH, half M) { return 
lit(NDotL, NDotH, M
 // CHECK: %mul.i = fmul reassoc nnan ninf nsz arcp afn float [[LOG]], %{{.*}}
 // CHECK: [[EXP:%.*]] = call reassoc nnan ninf nsz arcp afn float 
@llvm.exp.f32(float %mul.i)
 // CHECK: %hlsl.select7.i = select reassoc nnan ninf nsz arcp afn i1 %{{.*}}, 
float 0.000000e+00, float %{{.*}}
-// CHECK: %vecins.i = insertelement <4 x float> %{{.*}}, float 
%hlsl.select7.i, i32 2
+// CHECK: [[PTR:%.*]] = getelementptr <4 x float>, ptr %Result.i, i32 0, i32 2
+// CHECK: store float %hlsl.select7.i, ptr [[PTR]], align 4
 // CHECK: ret <4 x float> %{{.*}}
 float4 test_lit_float(float NDotL, float NDotH, float M) { return lit(NDotL, 
NDotH, M); }

>From 7c81f23667cb1205779e8ff515450388a4637f7a Mon Sep 17 00:00:00 2001
From: Helena Kotas <[email protected]>
Date: Mon, 24 Nov 2025 18:22:39 -0800
Subject: [PATCH 2/2] code review feedback - add assert and update comment

---
 clang/lib/CodeGen/CGExpr.cpp | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 763ce6369fc51..f04be5bfd9daf 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2576,16 +2576,18 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue 
Src, LValue Dst,
   if (!Dst.isSimple()) {
     if (Dst.isVectorElt()) {
       if (getLangOpts().HLSL) {
-        // In HLSL, storing to individual elements of a vector through
-        // VectorElt LValue needs to be handled as separate store. We need to
-        // avoid the load/modify/store sequence to prevent overwriting other
-        // elements that might be getting updated in parallel.
+        // HLSL allows direct access to vector elements, so storing to
+        // individual elements of a vector through VectorElt is handled as
+        // separate store instructions.
         Address DstAddr = Dst.getVectorAddress();
         llvm::Type *DestAddrTy = DstAddr.getElementType();
         llvm::Type *ElemTy = DestAddrTy->getScalarType();
         CharUnits ElemAlign = CharUnits::fromQuantity(
             CGM.getDataLayout().getPrefTypeAlign(ElemTy));
 
+        assert(ElemTy->getScalarSizeInBits() >= 8 &&
+               "vector element type must be at least byte-sized");
+
         llvm::Value *Val = Src.getScalarVal();
         if (Val->getType()->getPrimitiveSizeInBits() <
             ElemTy->getScalarSizeInBits())

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to