https://github.com/s-perron updated 
https://github.com/llvm/llvm-project/pull/169393

>From 64becbc15cd5c5249480b689d14f4bcf8c489092 Mon Sep 17 00:00:00 2001
From: Steven Perron <[email protected]>
Date: Mon, 24 Nov 2025 09:59:42 -0500
Subject: [PATCH 1/2] [SPIRV] Use OpCopyMemory for logical SPIRV memcpy

This commit modifies the SPIRV instruction selector to emit `OpCopyMemory`
instead of `OpCopyMemorySized` when generating SPIRV for logical addressing.

Previously, `G_MEMCPY` was translated to `OpCopyMemorySized`, which requires an
explicit size operand. However, for logical SPIRV, the size of the pointee type
is implicitly known. This change ensures that `OpCopyMemory` is used, which is
more appropriate for logical SPIRV and aligns with the SPIR-V specification for
logical addressing.
---
 .../Target/SPIRV/SPIRVInstructionSelector.cpp | 138 ++++++++++++------
 .../SPIRV/llvm-intrinsics/logical-memcpy.ll   |  32 ++++
 2 files changed, 126 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll

diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp 
b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index d3fc08eb56cb3..89f23c25ac906 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -149,6 +149,9 @@ class SPIRVInstructionSelector : public InstructionSelector 
{
   bool selectStackRestore(MachineInstr &I) const;
 
   bool selectMemOperation(Register ResVReg, MachineInstr &I) const;
+  Register getOrCreateMemSetGlobal(MachineInstr &I) const;
+  bool selectCopyMemory(MachineInstr &I, Register SrcReg) const;
+  bool selectCopyMemorySized(MachineInstr &I, Register SrcReg) const;
 
   bool selectAtomicRMW(Register ResVReg, const SPIRVType *ResType,
                        MachineInstr &I, unsigned NewOpcode,
@@ -1435,50 +1438,76 @@ bool 
SPIRVInstructionSelector::selectStackRestore(MachineInstr &I) const {
       .constrainAllUses(TII, TRI, RBI);
 }
 
-bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg,
-                                                  MachineInstr &I) const {
+Register
+SPIRVInstructionSelector::getOrCreateMemSetGlobal(MachineInstr &I) const {
+  MachineIRBuilder MIRBuilder(I);
+  assert(I.getOperand(1).isReg() && I.getOperand(2).isReg());
+  unsigned Val = getIConstVal(I.getOperand(1).getReg(), MRI);
+  unsigned Num = getIConstVal(I.getOperand(2).getReg(), MRI);
+  Type *ValTy = Type::getInt8Ty(I.getMF()->getFunction().getContext());
+  Type *ArrTy = ArrayType::get(ValTy, Num);
+  SPIRVType *VarTy = GR.getOrCreateSPIRVPointerType(
+      ArrTy, MIRBuilder, SPIRV::StorageClass::UniformConstant);
+
+  SPIRVType *SpvArrTy = GR.getOrCreateSPIRVType(
+      ArrTy, MIRBuilder, SPIRV::AccessQualifier::None, false);
+  Register Const = GR.getOrCreateConstIntArray(Val, Num, I, SpvArrTy, TII);
+  // TODO: check if we have such GV, add init, use buildGlobalVariable.
+  Function &CurFunction = GR.CurMF->getFunction();
+  Type *LLVMArrTy =
+      ArrayType::get(IntegerType::get(CurFunction.getContext(), 8), Num);
+  // Module takes ownership of the global var.
+  GlobalVariable *GV = new GlobalVariable(*CurFunction.getParent(), LLVMArrTy,
+                                          true, GlobalValue::InternalLinkage,
+                                          Constant::getNullValue(LLVMArrTy));
+  Register VarReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
+  auto MIBVar =
+      BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable))
+          .addDef(VarReg)
+          .addUse(GR.getSPIRVTypeID(VarTy))
+          .addImm(SPIRV::StorageClass::UniformConstant)
+          .addUse(Const);
+  if (!MIBVar.constrainAllUses(TII, TRI, RBI))
+    return Register();
+
+  GR.add(GV, MIBVar);
+  GR.addGlobalObject(GV, GR.CurMF, VarReg);
+
+  buildOpDecorate(VarReg, I, TII, SPIRV::Decoration::Constant, {});
+  return VarReg;
+}
+
+bool SPIRVInstructionSelector::selectCopyMemory(MachineInstr &I,
+                                                Register SrcReg) const {
   MachineBasicBlock &BB = *I.getParent();
-  Register SrcReg = I.getOperand(1).getReg();
-  bool Result = true;
-  if (I.getOpcode() == TargetOpcode::G_MEMSET) {
+  Register DstReg = I.getOperand(0).getReg();
+  SPIRVType *DstTy = GR.getSPIRVTypeForVReg(DstReg);
+  SPIRVType *SrcTy = GR.getSPIRVTypeForVReg(SrcReg);
+  if (GR.getPointeeType(DstTy) != GR.getPointeeType(SrcTy))
+    report_fatal_error("OpCopyMemory requires operands to have the same type");
+  uint64_t CopySize = getIConstVal(I.getOperand(2).getReg(), MRI);
+  SPIRVType *PointeeTy = GR.getPointeeType(DstTy);
+  const Type *LLVMPointeeTy = GR.getTypeForSPIRVType(PointeeTy);
+  if (!LLVMPointeeTy)
+    report_fatal_error(
+        "Unable to determine pointee type size for OpCopyMemory");
+  const DataLayout &DL = I.getMF()->getFunction().getDataLayout();
+  if (CopySize != DL.getTypeStoreSize(const_cast<Type *>(LLVMPointeeTy)))
+    report_fatal_error(
+        "OpCopyMemory requires the size to match the pointee type size");
+  auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemory))
+                 .addUse(DstReg)
+                 .addUse(SrcReg);
+  if (I.getNumMemOperands()) {
     MachineIRBuilder MIRBuilder(I);
-    assert(I.getOperand(1).isReg() && I.getOperand(2).isReg());
-    unsigned Val = getIConstVal(I.getOperand(1).getReg(), MRI);
-    unsigned Num = getIConstVal(I.getOperand(2).getReg(), MRI);
-    Type *ValTy = Type::getInt8Ty(I.getMF()->getFunction().getContext());
-    Type *ArrTy = ArrayType::get(ValTy, Num);
-    SPIRVType *VarTy = GR.getOrCreateSPIRVPointerType(
-        ArrTy, MIRBuilder, SPIRV::StorageClass::UniformConstant);
-
-    SPIRVType *SpvArrTy = GR.getOrCreateSPIRVType(
-        ArrTy, MIRBuilder, SPIRV::AccessQualifier::None, false);
-    Register Const = GR.getOrCreateConstIntArray(Val, Num, I, SpvArrTy, TII);
-    // TODO: check if we have such GV, add init, use buildGlobalVariable.
-    Function &CurFunction = GR.CurMF->getFunction();
-    Type *LLVMArrTy =
-        ArrayType::get(IntegerType::get(CurFunction.getContext(), 8), Num);
-    // Module takes ownership of the global var.
-    GlobalVariable *GV = new GlobalVariable(*CurFunction.getParent(), 
LLVMArrTy,
-                                            true, GlobalValue::InternalLinkage,
-                                            Constant::getNullValue(LLVMArrTy));
-    Register VarReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
-    auto MIBVar =
-        BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable))
-            .addDef(VarReg)
-            .addUse(GR.getSPIRVTypeID(VarTy))
-            .addImm(SPIRV::StorageClass::UniformConstant)
-            .addUse(Const);
-    Result &= MIBVar.constrainAllUses(TII, TRI, RBI);
-
-    GR.add(GV, MIBVar);
-    GR.addGlobalObject(GV, GR.CurMF, VarReg);
-
-    buildOpDecorate(VarReg, I, TII, SPIRV::Decoration::Constant, {});
-    SPIRVType *SourceTy = GR.getOrCreateSPIRVPointerType(
-        ValTy, I, SPIRV::StorageClass::UniformConstant);
-    SrcReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
-    selectOpWithSrcs(SrcReg, SourceTy, I, {VarReg}, SPIRV::OpBitcast);
+    addMemoryOperands(*I.memoperands_begin(), MIB, MIRBuilder, GR);
   }
+  return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectCopyMemorySized(MachineInstr &I,
+                                                     Register SrcReg) const {
+  MachineBasicBlock &BB = *I.getParent();
   auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpCopyMemorySized))
                  .addUse(I.getOperand(0).getReg())
                  .addUse(SrcReg)
@@ -1487,9 +1516,30 @@ bool 
SPIRVInstructionSelector::selectMemOperation(Register ResVReg,
     MachineIRBuilder MIRBuilder(I);
     addMemoryOperands(*I.memoperands_begin(), MIB, MIRBuilder, GR);
   }
-  Result &= MIB.constrainAllUses(TII, TRI, RBI);
-  if (ResVReg.isValid() && ResVReg != MIB->getOperand(0).getReg())
-    Result &= BuildCOPY(ResVReg, MIB->getOperand(0).getReg(), I);
+  return MIB.constrainAllUses(TII, TRI, RBI);
+}
+
+bool SPIRVInstructionSelector::selectMemOperation(Register ResVReg,
+                                                  MachineInstr &I) const {
+  Register SrcReg = I.getOperand(1).getReg();
+  bool Result = true;
+  if (I.getOpcode() == TargetOpcode::G_MEMSET) {
+    Register VarReg = getOrCreateMemSetGlobal(I);
+    if (!VarReg.isValid())
+      return false;
+    Type *ValTy = Type::getInt8Ty(I.getMF()->getFunction().getContext());
+    SPIRVType *SourceTy = GR.getOrCreateSPIRVPointerType(
+        ValTy, I, SPIRV::StorageClass::UniformConstant);
+    SrcReg = MRI->createGenericVirtualRegister(LLT::scalar(64));
+    Result &= selectOpWithSrcs(SrcReg, SourceTy, I, {VarReg}, 
SPIRV::OpBitcast);
+  }
+  if (STI.isLogicalSPIRV()) {
+    Result &= selectCopyMemory(I, SrcReg);
+  } else {
+    Result &= selectCopyMemorySized(I, SrcReg);
+  }
+  if (ResVReg.isValid() && ResVReg != I.getOperand(0).getReg())
+    Result &= BuildCOPY(ResVReg, I.getOperand(0).getReg(), I);
   return Result;
 }
 
diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll 
b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll
new file mode 100644
index 0000000000000..63eddd20bfc22
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/logical-memcpy.ll
@@ -0,0 +1,32 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv-unknown-unknown %s -o - | 
FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-unknown %s -o - 
-filetype=obj | spirv-val %}
+
+; CHECK: OpName %[[dst_var:[0-9]+]] "dst"
+; CHECK: OpName %[[src_var:[0-9]+]] "src"
+
+; CHECK: %[[f32:[0-9]+]] = OpTypeFloat 32
+; CHECK: %[[structS:[0-9]+]] = OpTypeStruct %[[f32]] %[[f32]] %[[f32]] 
%[[f32]] %[[f32]]
+; CHECK: %[[ptr_crosswkgrp_structS:[0-9]+]] = OpTypePointer CrossWorkgroup 
%[[structS]]
+%struct.S = type <{ float, float, float, float, float }>
+
+; CHECK-DAG: %[[src_var]] = OpVariable %[[ptr_crosswkgrp_structS]] 
CrossWorkgroup 
+@src = external dso_local addrspace(1) global %struct.S, align 4
+
+; CHECK-DAG: %[[dst_var]] = OpVariable %[[ptr_crosswkgrp_structS]] 
CrossWorkgroup 
+@dst = external dso_local addrspace(1) global %struct.S, align 4
+
+; CHECK: %[[main_func:[0-9]+]] = OpFunction %{{[0-9]+}} None %{{[0-9]+}}
+; CHECK: %[[entry:[0-9]+]] = OpLabel
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind 
willreturn memory(readwrite, inaccessiblemem: none, target_mem0: none, 
target_mem1: none)
+define void @main() local_unnamed_addr #0 {
+entry:
+; CHECK: OpCopyMemory %[[dst_var]] %[[src_var]] Aligned 4
+  call void @llvm.memcpy.p0.p0.i64(ptr addrspace(1) align 4 @dst, ptr 
addrspace(1) align 4 @src, i64 20, i1 false)
+  ret void
+; CHECK: OpReturn
+; CHECK: OpFunctionEnd
+}
+
+attributes #0 = { "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" }
+
+

>From 7e830b7a3e63d3dd9adbd7c6e47bb57ef9944ff2 Mon Sep 17 00:00:00 2001
From: Steven Perron <[email protected]>
Date: Mon, 24 Nov 2025 14:31:19 -0500
Subject: [PATCH 2/2] [SPIRV] Update logical SPIR-V data layout

The data layout rules for Vulkan allow data to be aligned according to
their scalar layout. The current data layout rules in Clang do not allow
that.

This updates those rules so that more compact data layouts can be
correctly represented.

Fixes https://github.com/llvm/llvm-project/issues/168401.
---
 clang/lib/Basic/Targets/SPIR.h             | 29 +++++++++++++++++---
 llvm/lib/IR/DataLayout.cpp                 |  7 ++++-
 llvm/lib/TargetParser/TargetDataLayout.cpp | 32 +++++++++++++++++-----
 3 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Basic/Targets/SPIR.h b/clang/lib/Basic/Targets/SPIR.h
index 332bf79e2babd..889a5976d948d 100644
--- a/clang/lib/Basic/Targets/SPIR.h
+++ b/clang/lib/Basic/Targets/SPIR.h
@@ -335,8 +335,28 @@ class LLVM_LIBRARY_VISIBILITY SPIRVTargetInfo : public 
BaseSPIRVTargetInfo {
 
     // SPIR-V IDs are represented with a single 32-bit word.
     SizeType = TargetInfo::UnsignedInt;
-    resetDataLayout("e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
-                    "v256:256-v512:512-v1024:1024-n8:16:32:64-G10");
+
+    // The data layout is intended to allow the least restrictive layout 
allowed
+    // by the Vulkan specification. This is the standard buffer layout when the
+    // scalarBlockLayout feature is enabled. See
+    // 
https://docs.vulkan.org/spec/latest/chapters/interfaces.html#interfaces-resources-layout.
+
+    // Bool is not allows in externally visible variables. They will be 
replaced
+    // with an i32. (i1:32). For vectors, we will make assumptions about the
+    // element for each vector size. We favor the most common element sizes.
+    // However, to get everything correct for HLSL, we will need to be able to
+    // specify the alighment more precisely.
+    // v16 -> 2 x 8-bits
+    // v24 -> 3 x 8-bits
+    // v32 -> 2 x 16-bits (ambiguous, could also be 4 x 8-bits)
+    // v48 -> 3 x 16-bits
+    // v64 -> 2 x 32-bits (ambiguous, could also be 4 x 16-bits)
+    // v96 -> 3 x 32-bits
+    // v128 -> 3 x 32-bits (ambiguous, could also be 2 x 64-bits)
+    // v192 -> 3 x 64-bits
+    // v256 -> 4 x 64-bits
+    resetDataLayout("e-i1:32-i64:64-v16:8-v24:8-v32:16-v48:16-v64:32-v96:32-"
+                    "v128:32-v192:64-v256:64-n8:16:32:64-G10");
   }
 
   void getTargetDefines(const LangOptions &Opts,
@@ -359,8 +379,9 @@ class LLVM_LIBRARY_VISIBILITY SPIRV32TargetInfo : public 
BaseSPIRVTargetInfo {
     // SPIR-V has core support for atomic ops, and Int32 is always available;
     // we take the maximum because it's possible the Host supports wider types.
     MaxAtomicInlineWidth = std::max<unsigned char>(MaxAtomicInlineWidth, 32);
-    resetDataLayout("e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-"
-                    "v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
+    resetDataLayout(
+        "e-p:32:32-i64:64-v16:16-v24:24-v32:32-v48:48-v96:96-v192:192-"
+        "v256:256-v512:512-v1024:1024-n8:16:32:64-G1");
   }
 
   void getTargetDefines(const LangOptions &Opts,
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
index 49e1f898ca594..456ebc92758fb 100644
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -311,9 +311,14 @@ static Error parseAlignment(StringRef Str, Align 
&Alignment, StringRef Name,
   }
 
   constexpr unsigned ByteWidth = 8;
-  if (Value % ByteWidth || !isPowerOf2_32(Value / ByteWidth))
+  if (Value % ByteWidth || !isPowerOf2_32(Value / ByteWidth)) {
+    llvm::dbgs() << "Value: " << Value << "\n";
+    llvm::dbgs() << "ByteWidth: " << ByteWidth << "\n";
+    llvm::dbgs() << "str: " << Str << "\n";
+    llvm::dbgs() << "Name: " << Name << "\n";
     return createStringError(
         Name + " alignment must be a power of two times the byte width");
+  }
 
   Alignment = Align(Value / ByteWidth);
   return Error::success();
diff --git a/llvm/lib/TargetParser/TargetDataLayout.cpp 
b/llvm/lib/TargetParser/TargetDataLayout.cpp
index d7359234b02f7..5df6ab421daaa 100644
--- a/llvm/lib/TargetParser/TargetDataLayout.cpp
+++ b/llvm/lib/TargetParser/TargetDataLayout.cpp
@@ -450,17 +450,35 @@ static std::string computeNVPTXDataLayout(const Triple 
&T, StringRef ABIName) {
 
 static std::string computeSPIRVDataLayout(const Triple &TT) {
   const auto Arch = TT.getArch();
-  // TODO: this probably needs to be revisited:
-  // Logical SPIR-V has no pointer size, so any fixed pointer size would be
-  // wrong. The choice to default to 32 or 64 is just motivated by another
-  // memory model used for graphics: PhysicalStorageBuffer64. But it shouldn't
-  // mean anything.
   if (Arch == Triple::spirv32)
     return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-"
            "v256:256-v512:512-v1024:1024-n8:16:32:64-G1";
+
+  // The data layout is intended to allow the least restrictive layout allowed
+  // by the Vulkan specification. This is the standard buffer layout when the
+  // scalarBlockLayout feature is enabled. See
+  // 
https://docs.vulkan.org/spec/latest/chapters/interfaces.html#interfaces-resources-layout.
+
+  // Bool is not allows in externally visible variables. They will be replaced
+  // with an i32. (i1:32). For vectors, we will make assumptions about the
+  // element for each vector size. We favor the most common element sizes.
+  // However, to get everything correct for HLSL, we will need to be able to
+  // specify the alighment more precisely.
+  // v16 -> 2 x 8-bits
+  // v24 -> 3 x 8-bits
+  // v32 -> 2 x 16-bits (ambiguous, could also be 4 x 8-bits)
+  // v48 -> 3 x 16-bits
+  // v64 -> 2 x 32-bits (ambiguous, could also be 4 x 16-bits)
+  // v96 -> 3 x 32-bits
+  // v128 -> 3 x 32-bits (ambiguous, could also be 2 x 64-bits)
+  // v192 -> 3 x 64-bits
+  // v256 -> 4 x 64-bits
+
+  // The choice for the pointer size is mean to alight with
+  // PhysicalStorageBuffer64 in case those pointer are exposed as pointers.
   if (Arch == Triple::spirv)
-    return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"
-           "v512:512-v1024:1024-n8:16:32:64-G10";
+    return ("e-i1:32-i64:64-v16:8-v24:8-v32:16-v48:16-v64:32-v96:32-v128:32-"
+            "v192:64-v256:64-n8:16:32:64-G10");
   if (TT.getVendor() == Triple::VendorType::AMD &&
       TT.getOS() == Triple::OSType::AMDHSA)
     return "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-"

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to