https://github.com/farzonl updated 
https://github.com/llvm/llvm-project/pull/142475

>From 244b01e43f4c974c682c90d1315c59605da2b289 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlo...@microsoft.com>
Date: Mon, 2 Jun 2025 15:40:22 -0400
Subject: [PATCH 1/5] [DirectX] add GEP i8 legalization The i8 legalization
 code in DXILLegalizePass's `fixI8UseChain` needs to be updated to check for
 i8 geps. It seems like there are i8 GEPs being left around after we remove
 all the other i8 instructions and this is causing problem on validation.

Since this is cleaning up a missed GEP The approach is to assume the
getPointerOperand is to an alloca we further will check if this is
an array alloca then do some byte offset arithmetic to figure out the
memory index to use. Finally we will emit the new gep and cleanup the
old one.

Finally needed to update upcastI8AllocasAndUses to account for loads off
of GEPs instead of just loads from the alloca.
---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 58 +++++++++++++++++---
 llvm/test/CodeGen/DirectX/legalize-i8.ll     | 54 ++++++++++++++++++
 2 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp 
b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 23883c936a20d..f6a80ebfc8435 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -95,6 +95,8 @@ static void fixI8UseChain(Instruction &I,
     Type *ElementType = NewOperands[0]->getType();
     if (auto *AI = dyn_cast<AllocaInst>(NewOperands[0]))
       ElementType = AI->getAllocatedType();
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(NewOperands[0]))
+      ElementType = GEP->getSourceElementType();
     LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewOperands[0]);
     ReplacedValues[Load] = NewLoad;
     ToRemove.push_back(Load);
@@ -164,6 +166,36 @@ static void fixI8UseChain(Instruction &I,
     if (AdjustedCast)
       Cast->replaceAllUsesWith(AdjustedCast);
   }
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+    if (!GEP->getType()->isPointerTy() ||
+        !GEP->getSourceElementType()->isIntegerTy(8))
+      return;
+
+    Value *BasePtr = GEP->getPointerOperand();
+    if (ReplacedValues.count(BasePtr))
+      BasePtr = ReplacedValues[BasePtr];
+
+    Type *ElementType = BasePtr->getType();
+    if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
+      ElementType = AI->getAllocatedType();
+    if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
+      ElementType = ArrTy->getArrayElementType();
+
+    ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    // Note: the only way to convert an i8 offset to an i32 offset without
+    // emitting code Would be to emit code. We sould expect this value to be a
+    // ConstantInt since Offsets are very regulalrly converted.
+    assert(Offset && "Offset is expected to be a ConstantInt");
+    uint32_t ByteOffset = Offset->getZExtValue();
+    uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
+    assert(ElemSize > 0 && "ElementSize must be set");
+    uint32_t Index = ByteOffset / ElemSize;
+    Value *NewGEP =
+        Builder.CreateGEP(ElementType, BasePtr, Builder.getInt32(Index),
+                          GEP->getName(), GEP->getNoWrapFlags());
+    ReplacedValues[GEP] = NewGEP;
+    ToRemove.push_back(GEP);
+  }
 }
 
 static void upcastI8AllocasAndUses(Instruction &I,
@@ -175,15 +207,12 @@ static void upcastI8AllocasAndUses(Instruction &I,
 
   Type *SmallestType = nullptr;
 
-  for (User *U : AI->users()) {
-    auto *Load = dyn_cast<LoadInst>(U);
-    if (!Load)
-      continue;
+  auto ProcessLoad = [&](LoadInst *Load) {
     for (User *LU : Load->users()) {
       Type *Ty = nullptr;
-      if (auto *Cast = dyn_cast<CastInst>(LU))
+      if (auto *Cast = dyn_cast<CastInst>(LU)) {
         Ty = Cast->getType();
-      if (CallInst *CI = dyn_cast<CallInst>(LU)) {
+      } else if (auto *CI = dyn_cast<CallInst>(LU)) {
         if (CI->getIntrinsicID() == Intrinsic::memset)
           Ty = Type::getInt32Ty(CI->getContext());
       }
@@ -191,9 +220,22 @@ static void upcastI8AllocasAndUses(Instruction &I,
       if (!Ty)
         continue;
 
-      if (!SmallestType ||
-          Ty->getPrimitiveSizeInBits() < 
SmallestType->getPrimitiveSizeInBits())
+      if (!SmallestType || Ty->getPrimitiveSizeInBits() <
+                               SmallestType->getPrimitiveSizeInBits()) {
         SmallestType = Ty;
+      }
+    }
+  };
+
+  for (User *U : AI->users()) {
+    if (auto *Load = dyn_cast<LoadInst>(U))
+      ProcessLoad(Load);
+    else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
+      for (User *GU : GEP->users()) {
+        if (auto *Load = dyn_cast<LoadInst>(GU)) {
+          ProcessLoad(Load);
+        }
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll 
b/llvm/test/CodeGen/DirectX/legalize-i8.ll
index 2602be778cd86..d1d76ccf5c76c 100644
--- a/llvm/test/CodeGen/DirectX/legalize-i8.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -106,3 +106,57 @@ define i32 @all_imm() {
   %2 = sext i8 %1 to i32
   ret i32 %2
 }
+
+define i32 @scalar_i8_geps() {
+  ; CHECK-LABEL: define i32 @scalar_i8_geps(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 4
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr 
[[ALLOCA]], i32 0
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+    %1 = alloca i8, align 4
+    %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+    %3 = load i8, ptr %2
+    %4 = sext i8 %3 to i32
+    ret i32 %4
+}
+
+define i32 @i8_geps_index0() {
+  ; CHECK-LABEL: define i32 @i8_geps_index0(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr 
[[ALLOCA]], i32 0
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 0
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @i8_geps_index1() {
+  ; CHECK-LABEL: define i32 @i8_geps_index1(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr 
[[ALLOCA]], i32 1
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]]
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}
+
+define i32 @i8_gep_store() {
+  ; CHECK-LABEL: define i32 @i8_gep_store(
+  ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x i32], align 8
+  ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds nuw i32, ptr 
[[ALLOCA]], i32 1
+  ; CHECK-NEXT:    store i32 1, ptr [[GEP]], align 4
+  ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[GEP]]
+  ; CHECK-NEXT:    ret i32 [[LOAD]]
+  %1 = alloca [2 x i32], align 8
+  %2 = getelementptr inbounds nuw i8, ptr %1, i32 4
+  store i8 1, ptr %2
+  %3 = load i8, ptr %2
+  %4 = sext i8 %3 to i32
+  ret i32 %4
+}

>From 19668a1619396f3ea7447df4644b362af870c982 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlo...@microsoft.com>
Date: Mon, 2 Jun 2025 17:49:04 -0400
Subject: [PATCH 2/5] fix comment up, fix up some style guide reuquired changes
 called out in pr comment

---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp 
b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index f6a80ebfc8435..d733187f2bae1 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -182,9 +182,10 @@ static void fixI8UseChain(Instruction &I,
       ElementType = ArrTy->getArrayElementType();
 
     ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
-    // Note: the only way to convert an i8 offset to an i32 offset without
-    // emitting code Would be to emit code. We sould expect this value to be a
-    // ConstantInt since Offsets are very regulalrly converted.
+    // Note: i8 to i32 offset conversion without emitting IR requires constant
+    // ints. Since offset conversion is common, we can safely assume Offset is
+    // always a ConstantInt, so no need to have a conditional bail out on
+    // nullptr, instead assert this is the case.
     assert(Offset && "Offset is expected to be a ConstantInt");
     uint32_t ByteOffset = Offset->getZExtValue();
     uint32_t ElemSize = GEP->getDataLayout().getTypeAllocSize(ElementType);
@@ -210,9 +211,9 @@ static void upcastI8AllocasAndUses(Instruction &I,
   auto ProcessLoad = [&](LoadInst *Load) {
     for (User *LU : Load->users()) {
       Type *Ty = nullptr;
-      if (auto *Cast = dyn_cast<CastInst>(LU)) {
+      if (CastInst *Cast = dyn_cast<CastInst>(LU))
         Ty = Cast->getType();
-      } else if (auto *CI = dyn_cast<CallInst>(LU)) {
+      else if (CallInst *CI = dyn_cast<CallInst>(LU)) {
         if (CI->getIntrinsicID() == Intrinsic::memset)
           Ty = Type::getInt32Ty(CI->getContext());
       }
@@ -220,10 +221,9 @@ static void upcastI8AllocasAndUses(Instruction &I,
       if (!Ty)
         continue;
 
-      if (!SmallestType || Ty->getPrimitiveSizeInBits() <
-                               SmallestType->getPrimitiveSizeInBits()) {
+      if (!SmallestType ||
+          Ty->getPrimitiveSizeInBits() < 
SmallestType->getPrimitiveSizeInBits())
         SmallestType = Ty;
-      }
     }
   };
 
@@ -232,9 +232,8 @@ static void upcastI8AllocasAndUses(Instruction &I,
       ProcessLoad(Load);
     else if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
       for (User *GU : GEP->users()) {
-        if (auto *Load = dyn_cast<LoadInst>(GU)) {
+        if (auto *Load = dyn_cast<LoadInst>(GU))
           ProcessLoad(Load);
-        }
       }
     }
   }

>From 6f4702a6ecdf9b2ba53497d72185f760f25f546f Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlo...@microsoft.com>
Date: Tue, 3 Jun 2025 14:04:29 -0400
Subject: [PATCH 3/5] add replaceAllUsesWith back to update old gep to new gep

---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp 
b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index d733187f2bae1..23f67cfe9fdda 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -195,6 +195,7 @@ static void fixI8UseChain(Instruction &I,
         Builder.CreateGEP(ElementType, BasePtr, Builder.getInt32(Index),
                           GEP->getName(), GEP->getNoWrapFlags());
     ReplacedValues[GEP] = NewGEP;
+    GEP->replaceAllUsesWith(NewGEP);
     ToRemove.push_back(GEP);
   }
 }

>From 72228f5bf29402d3ba77b50986052638a96625c2 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlo...@microsoft.com>
Date: Tue, 3 Jun 2025 17:10:31 -0400
Subject: [PATCH 4/5] Add support for GEPs with global ptr operands. Add
 support for loads with constexpr GEPs

---
 llvm/lib/Target/DirectX/DXILLegalizePass.cpp | 32 ++++++++++++++++++--
 llvm/test/CodeGen/DirectX/legalize-i8.ll     | 16 ++++++++++
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp 
b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
index 23f67cfe9fdda..ba44bfbc27a8d 100644
--- a/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
+++ b/llvm/lib/Target/DirectX/DXILLegalizePass.cpp
@@ -87,9 +87,8 @@ static void fixI8UseChain(Instruction &I,
     return;
   }
 
-  if (auto *Load = dyn_cast<LoadInst>(&I)) {
-    if (!I.getType()->isIntegerTy(8))
-      return;
+  if (auto *Load = dyn_cast<LoadInst>(&I);
+      Load && I.getType()->isIntegerTy(8)) {
     SmallVector<Value *> NewOperands;
     ProcessOperands(NewOperands);
     Type *ElementType = NewOperands[0]->getType();
@@ -103,6 +102,31 @@ static void fixI8UseChain(Instruction &I,
     return;
   }
 
+  if (auto *Load = dyn_cast<LoadInst>(&I);
+      Load && isa<ConstantExpr>(Load->getPointerOperand())) {
+    auto *CE = dyn_cast<ConstantExpr>(Load->getPointerOperand());
+    if (!(CE->getOpcode() == Instruction::GetElementPtr))
+      return;
+    auto *GEP = dyn_cast<GEPOperator>(CE);
+    if (!GEP->getSourceElementType()->isIntegerTy(8))
+      return;
+
+    Type *ElementType = Load->getType();
+    ConstantInt *Offset = dyn_cast<ConstantInt>(GEP->getOperand(1));
+    uint32_t ByteOffset = Offset->getZExtValue();
+    uint32_t ElemSize = Load->getDataLayout().getTypeAllocSize(ElementType);
+    uint32_t Index = ByteOffset / ElemSize;
+    Value *NewGEP = Builder.CreateGEP(ElementType, GEP->getPointerOperand(),
+                                      Builder.getInt32(Index), GEP->getName(),
+                                      GEP->getNoWrapFlags());
+
+    LoadInst *NewLoad = Builder.CreateLoad(ElementType, NewGEP);
+    ReplacedValues[Load] = NewLoad;
+    Load->replaceAllUsesWith(NewLoad);
+    ToRemove.push_back(Load);
+    return;
+  }
+
   if (auto *BO = dyn_cast<BinaryOperator>(&I)) {
     if (!I.getType()->isIntegerTy(8))
       return;
@@ -178,6 +202,8 @@ static void fixI8UseChain(Instruction &I,
     Type *ElementType = BasePtr->getType();
     if (auto *AI = dyn_cast<AllocaInst>(BasePtr))
       ElementType = AI->getAllocatedType();
+    if (auto *GV = dyn_cast<GlobalVariable>(BasePtr))
+      ElementType = GV->getValueType();
     if (auto *ArrTy = dyn_cast<ArrayType>(ElementType))
       ElementType = ArrTy->getArrayElementType();
 
diff --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll 
b/llvm/test/CodeGen/DirectX/legalize-i8.ll
index d1d76ccf5c76c..aedd3b5a79493 100644
--- a/llvm/test/CodeGen/DirectX/legalize-i8.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -160,3 +160,19 @@ define i32 @i8_gep_store() {
   %4 = sext i8 %3 to i32
   ret i32 %4
 }
+
+@g = local_unnamed_addr addrspace(3) global [2 x float] zeroinitializer, align 
4
+define float @i8_gep_global_index() {
+  ; CHECK-LABEL: define i32 @i8_gep_global_index(
+  ; CHECK-NEXT: %2 = load float, ptr addrspace(3) getelementptr inbounds nuw 
(float, ptr addrspace(3) @g, i32 1), align 4
+  %1 = getelementptr inbounds nuw i8, ptr addrspace(3) @g, i32 4
+  %2 = load float, ptr addrspace(3) %1, align 4
+  ret float %2
+}
+
+define float @i8_gep_global_constexpr() {
+  ; CHECK-LABEL: define i32 @i8_gep_global_constexpr(
+  ; CHECK-NEXT: %2 = load float, ptr addrspace(3) getelementptr inbounds nuw 
(float, ptr addrspace(3) @g, i32 1), align 4
+  %1 = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr 
addrspace(3) @g, i32 4), align 4
+  ret float %1
+}

>From 221c68512ada7db14b675db81a100fecc0755769 Mon Sep 17 00:00:00 2001
From: Farzon Lotfi <farzonlo...@microsoft.com>
Date: Tue, 3 Jun 2025 17:29:55 -0400
Subject: [PATCH 5/5] mend

---
 clang/lib/CodeGen/CGHLSLRuntime.cpp      |  4 ++--
 llvm/test/CodeGen/DirectX/legalize-i8.ll | 10 ++++++----
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index cfe9dc1192d9d..d525d371e4276 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -463,11 +463,11 @@ void CGHLSLRuntime::emitEntryFunction(const FunctionDecl 
*FD,
   B.CreateRetVoid();
 
   // Add and identify root signature to function, if applicable
-  for (const Attr *Attr : FD->getAttrs()) {
+  /*for (const Attr *Attr : FD->getAttrs()) {
     if (const auto *RSAttr = dyn_cast<RootSignatureAttr>(Attr))
       addRootSignature(RSAttr->getSignatureDecl()->getRootElements(), EntryFn,
                        M);
-  }
+  }*/
 }
 
 void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
diff --git a/llvm/test/CodeGen/DirectX/legalize-i8.ll 
b/llvm/test/CodeGen/DirectX/legalize-i8.ll
index aedd3b5a79493..97336222896e6 100644
--- a/llvm/test/CodeGen/DirectX/legalize-i8.ll
+++ b/llvm/test/CodeGen/DirectX/legalize-i8.ll
@@ -163,16 +163,18 @@ define i32 @i8_gep_store() {
 
 @g = local_unnamed_addr addrspace(3) global [2 x float] zeroinitializer, align 
4
 define float @i8_gep_global_index() {
-  ; CHECK-LABEL: define i32 @i8_gep_global_index(
-  ; CHECK-NEXT: %2 = load float, ptr addrspace(3) getelementptr inbounds nuw 
(float, ptr addrspace(3) @g, i32 1), align 4
+  ; CHECK-LABEL: define float @i8_gep_global_index(
+  ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) getelementptr 
inbounds nuw (float, ptr addrspace(3) @g, i32 1), align 4
+  ; CHECK-NEXT:    ret float [[LOAD]]
   %1 = getelementptr inbounds nuw i8, ptr addrspace(3) @g, i32 4
   %2 = load float, ptr addrspace(3) %1, align 4
   ret float %2
 }
 
 define float @i8_gep_global_constexpr() {
-  ; CHECK-LABEL: define i32 @i8_gep_global_constexpr(
-  ; CHECK-NEXT: %2 = load float, ptr addrspace(3) getelementptr inbounds nuw 
(float, ptr addrspace(3) @g, i32 1), align 4
+  ; CHECK-LABEL: define float @i8_gep_global_constexpr(
+  ; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr addrspace(3) getelementptr 
inbounds nuw (float, ptr addrspace(3) @g, i32 1), align 4
+  ; CHECK-NEXT: ret float [[LOAD]]
   %1 = load float, ptr addrspace(3) getelementptr inbounds nuw (i8, ptr 
addrspace(3) @g, i32 4), align 4
   ret float %1
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to