[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
@@ -4308,26 +4398,33 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI, "reference."); #endif - auto Assign = [&MFI](int FI, int64_t Offset) { + auto StackForObject = [&](int FI, uint64_t &ZPRStackTop, +uint64_t &PPRStackTop) -> uint64_t & { +return MFI.getStackID(FI) == TargetStackID::ScalableVector ? ZPRStackTop + : PPRStackTop; + }; + + auto Assign = [&MFI, AssignOffsets](int FI, int64_t Offset) { +if (AssignOffsets == AssignObjectOffsets::No) + return; LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n"); MFI.setObjectOffset(FI, Offset); }; - int64_t Offset = 0; - // Then process all callee saved slots. + int MinCSFrameIndex, MaxCSFrameIndex; if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) { -// Assign offsets to the callee save slots. -for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) { - Offset += MFI.getObjectSize(I); - Offset = alignTo(Offset, MFI.getObjectAlign(I)); - if (AssignOffsets) -Assign(I, -Offset); +for (int FI = MinCSFrameIndex; FI <= MaxCSFrameIndex; ++FI) { + uint64_t &StackTop = StackForObject(FI, ZPRStackTop, PPRStackTop); + StackTop += MFI.getObjectSize(FI); + StackTop = alignTo(StackTop, MFI.getObjectAlign(FI)); + Assign(FI, -int64_t(StackTop)); MacDue wrote: I've added the assert and moved common logic into `AllocateObject` (what used to be `Assign`). https://github.com/llvm/llvm-project/pull/142391 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Improve error accumulation in root signature parsing (PR #144465)
@@ -699,19 +736,20 @@ static bool verifyBorderColor(uint32_t BorderColor) { static bool verifyLOD(float LOD) { return !std::isnan(LOD); } static bool validate(LLVMContext *Ctx, const mcdxbc::RootSignatureDesc &RSD) { - + bool HasError = false; if (!verifyVersion(RSD.Version)) { -return reportValueError(Ctx, "Version", RSD.Version); +HasError = reportValueError(Ctx, "Version", RSD.Version) || HasError; llvm-beanz wrote: This pattern seems really awkward because the report functions always return `true`. That means that the `||` is always unnecessary, but also the assignment itself isn't really as clear to understand as it could be. You could instead write this code as something like: ``` if (!verifyVersion(RSD.Version)) { reportValueError(...); HasError = true; } ``` This is more verbose but a lot easier to see what is happening. https://github.com/llvm/llvm-project/pull/144465 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)
@@ -48,6 +48,71 @@ static bool reportValueError(LLVMContext *Ctx, Twine ParamName, return true; } +// Template function to get formatted type string based on C++ type +template std::string getTypeFormatted() { llvm-beanz wrote: Looks like this could be a StringRef. ```suggestion template StringRef getTypeFormatted() { ``` https://github.com/llvm/llvm-project/pull/144577 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
https://github.com/kasuga-fj ready_for_review https://github.com/llvm/llvm-project/pull/148241 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)
@@ -48,6 +48,71 @@ static bool reportValueError(LLVMContext *Ctx, Twine ParamName, return true; } +// Template function to get formatted type string based on C++ type +template std::string getTypeFormatted() { + if constexpr (std::is_same_v) { +return "string"; + } else if constexpr (std::is_same_v || + std::is_same_v) { +return "metadata"; + } else if constexpr (std::is_same_v || + std::is_same_v) { +return "constant"; + } else if constexpr (std::is_same_v) { +return "constant"; + } else if constexpr (std::is_same_v || + std::is_same_v) { +return "constant int"; + } else if constexpr (std::is_same_v) { +return "constant int"; + } + return "unknown"; +} + +// Helper function to get the actual type of a metadata operand +std::string getActualMDType(const MDNode *Node, unsigned Index) { llvm-beanz wrote: ```suggestion StringRef getActualMDType(const MDNode *Node, unsigned Index) { ``` https://github.com/llvm/llvm-project/pull/144577 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)
@@ -48,6 +48,71 @@ static bool reportValueError(LLVMContext *Ctx, Twine ParamName, return true; } +// Template function to get formatted type string based on C++ type +template std::string getTypeFormatted() { + if constexpr (std::is_same_v) { +return "string"; + } else if constexpr (std::is_same_v || + std::is_same_v) { +return "metadata"; + } else if constexpr (std::is_same_v || + std::is_same_v) { +return "constant"; + } else if constexpr (std::is_same_v) { +return "constant"; + } else if constexpr (std::is_same_v || + std::is_same_v) { +return "constant int"; + } else if constexpr (std::is_same_v) { +return "constant int"; + } + return "unknown"; +} + +// Helper function to get the actual type of a metadata operand +std::string getActualMDType(const MDNode *Node, unsigned Index) { + if (!Node || Index >= Node->getNumOperands()) +return "null"; + + Metadata *Op = Node->getOperand(Index); + if (!Op) +return "null"; + + if (isa(Op)) +return getTypeFormatted(); + + if (isa(Op)) { +if (auto *CAM = dyn_cast(Op)) { + Type *T = CAM->getValue()->getType(); + if (T->isIntegerTy()) +return (Twine("i") + Twine(T->getIntegerBitWidth())).str(); + if (T->isFloatingPointTy()) +return T->isFloatTy()? getTypeFormatted() + : T->isDoubleTy() ? getTypeFormatted() + : "fp"; + + return getTypeFormatted(); +} + } + if (isa(Op)) +return getTypeFormatted(); + + return "unknown"; +} + +// Helper function to simplify error reporting for invalid metadata values +template +auto reportInvalidTypeError(LLVMContext *Ctx, Twine ParamName, llvm-beanz wrote: `auto` as a return type rarely meets LLVM's coding standards: see: https://llvm.org/docs/CodingStandards.html#use-auto-type-deduction-to-make-code-more-readable https://github.com/llvm/llvm-project/pull/144577 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DirectX] Improve error handling and validation in root signature parsing (PR #144577)
https://github.com/Icohedron dismissed https://github.com/llvm/llvm-project/pull/144577 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] ELF: Introduce R_AARCH64_FUNCINIT64 relocation type. (PR #133531)
@@ -0,0 +1,19 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-readelf -s -r %t | FileCheck %s +# RUN: ld.lld %t.o -o %t -pie +# RUN: llvm-readelf -s -r %t | FileCheck %s +# RUN: not ld.lld %t.o -o %t -shared 2>&1 | FileCheck --check-prefix=ERR %s + +.data +# CHECK: R_AARCH64_IRELATIVE [[FOO:[0-9a-f]*]] +# ERR: relocation R_AARCH64_FUNCINIT64 cannot be used against preemptible symbol 'foo' +.8byte foo@FUNCINIT smithp35 wrote: Although not this patch, MaskRay is proposing that we use a different syntax for relocation specifiers to avoid ambiguity with the addend: https://maskray.me/blog/2025-03-16-relocation-generation-in-assemblers I'm proposing that aarch64 ELF follows this for data relocation specifiers (first one in https://github.com/ARM-software/abi-aa/pull/330/files#diff-c74a0dce6771ac7b499e84c140122aaa972bd9d63aed84863e675ecc9b4b2c32R659) I'm assuming that we could migrate to this syntax at a later date if needed. https://github.com/llvm/llvm-project/pull/133531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] ELF: Introduce R_AARCH64_FUNCINIT64 relocation type. (PR #133531)
https://github.com/smithp35 commented: I don't have any more significant comments and no objections to the patch. Going back to my previous comments I was most concerned when the target was an ifunc symbol and that is now not supported. https://github.com/llvm/llvm-project/pull/133531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] ELF: Introduce R_AARCH64_FUNCINIT64 relocation type. (PR #133531)
https://github.com/smithp35 edited https://github.com/llvm/llvm-project/pull/133531 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
@@ -113,7 +113,7 @@ define void @banerjee1(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: %2 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store i64 %2, ptr %B.addr.12, align 8 --> Dst: store i64 %2, ptr %B.addr.12, align 8 -; CHECK-NEXT:da analyze - output [* *]! kasuga-fj wrote: All the test changes except for FlipFlopBaseAddress.ll are related to dependencies between memory accesses of the form `*B++`. https://github.com/llvm/llvm-project/pull/148241 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SimplifyLibCalls] Add initial support for non-8-bit bytes (PR #106542)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106542 >From d2b8f2e23d50c638c31c787003c0e67feac98f18 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Wed, 28 Aug 2024 16:09:44 +0300 Subject: [PATCH] [SimplifyLibCalls] Add initial support for non-8-bit bytes The patch makes CharWidth argument of `getStringLength` mandatory and ensures the correct values are passed in most cases. This is *not* a complete support for unusual byte widths in SimplifyLibCalls since `getConstantStringInfo` returns false for those. The code guarded by `getConstantStringInfo` returning true is unchanged because the changes are currently not testable. --- llvm/include/llvm/Analysis/ValueTracking.h| 4 +- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 4 +- llvm/lib/Analysis/MemoryBuiltins.cpp | 3 +- llvm/lib/Analysis/ValueTracking.cpp | 40 ++-- .../InstCombine/InstCombineCalls.cpp | 12 +- .../InstCombine/InstructionCombining.cpp | 5 +- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 191 -- .../InstCombine/SimplifyLibCalls/fputs-b16.ll | 19 ++ .../SimplifyLibCalls/fwrite-b16.ll| 19 ++ .../SimplifyLibCalls/memchr-b16.ll| 34 .../SimplifyLibCalls/memcmp-b32.ll| 32 +++ .../SimplifyLibCalls/memcpy-b16.ll| 69 +++ .../SimplifyLibCalls/memcpy_chk-b16.ll| 17 ++ .../SimplifyLibCalls/mempcpy-b16.ll | 17 ++ .../SimplifyLibCalls/memrchr-b16.ll | 20 ++ .../SimplifyLibCalls/memset-b16.ll| 66 ++ .../SimplifyLibCalls/stpcpy-b16.ll| 31 +++ .../SimplifyLibCalls/stpcpy_chk-b16.ll| 44 .../SimplifyLibCalls/stpncpy-b16.ll | 47 + .../SimplifyLibCalls/strcat-b16.ll| 20 ++ .../SimplifyLibCalls/strchr-b16.ll| 45 + .../SimplifyLibCalls/strcmp-b32.ll| 50 + .../SimplifyLibCalls/strcpy-b16.ll| 18 ++ .../SimplifyLibCalls/strcpy_chk-b16.ll| 30 +++ .../SimplifyLibCalls/strlcpy-b16.ll | 18 ++ .../SimplifyLibCalls/strlen-b16.ll| 16 ++ .../SimplifyLibCalls/strncat-b16.ll | 20 ++ .../SimplifyLibCalls/strncmp-b32.ll | 34 .../SimplifyLibCalls/strncpy-b16.ll | 43 .../SimplifyLibCalls/strndup-b16.ll | 17 ++ .../SimplifyLibCalls/strnlen-b16.ll | 18 ++ .../SimplifyLibCalls/wcslen-b16.ll| 19 ++ llvm/test/Transforms/InstCombine/bcmp-1.ll| 2 +- llvm/test/Transforms/InstCombine/memcmp-1.ll | 2 +- llvm/test/Transforms/InstCombine/strncmp-1.ll | 2 +- 35 files changed, 928 insertions(+), 100 deletions(-) create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/fputs-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/fwrite-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memchr-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcmp-b32.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memcpy_chk-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/mempcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memrchr-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/memset-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpcpy_chk-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/stpncpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcat-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strchr-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcmp-b32.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strcpy_chk-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strlcpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strlen-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncat-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncmp-b32.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strncpy-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strndup-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/strnlen-b16.ll create mode 100644 llvm/test/Transforms/InstCombine/SimplifyLibCalls/wcslen-b16.ll diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index
[llvm-branch-commits] [llvm] [mlir] [IR] Make @llvm.memset prototype byte width dependent (PR #106537)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106537 >From 83efc1aa133fb2e52e81ae4319c978859da61911 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 1 Aug 2024 23:47:25 +0300 Subject: [PATCH] [IR] Make @llvm.memset prototype byte width dependent This patch changes the type of the value argument of @llvm.memset and similar intrinsics from i8 to iN, where N is the byte width specified in data layout string. Note that the argument still has fixed type (not overloaded), but type checker will complain if the type does not match the byte width. Ideally, the type of the argument would be dependent on the address space of the pointer argument. It is easy to do this (and I did it downstream as a PoC), but since data layout string doesn't currently allow different byte widths for different address spaces, I refrained from doing it now. --- llvm/include/llvm-c/Core.h| 2 +- llvm/include/llvm/IR/Intrinsics.h | 14 +++-- llvm/include/llvm/IR/Intrinsics.td| 13 ++-- llvm/lib/AsmParser/LLParser.cpp | 4 +- llvm/lib/IR/AutoUpgrade.cpp | 4 +- llvm/lib/IR/Core.cpp | 4 +- llvm/lib/IR/Function.cpp | 4 +- llvm/lib/IR/IRBuilder.cpp | 2 +- llvm/lib/IR/Intrinsics.cpp| 61 +++ llvm/lib/IR/Verifier.cpp | 2 +- .../NumericalStabilitySanitizer.cpp | 2 +- .../LLVMIR/LLVMToLLVMIRTranslation.cpp| 4 +- 12 files changed, 69 insertions(+), 47 deletions(-) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index d645646289025..4ecc657837910 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -2930,7 +2930,7 @@ LLVM_C_ABI LLVMValueRef LLVMGetIntrinsicDeclaration(LLVMModuleRef Mod, * * @see llvm::Intrinsic::getType() */ -LLVM_C_ABI LLVMTypeRef LLVMIntrinsicGetType(LLVMContextRef Ctx, unsigned ID, +LLVM_C_ABI LLVMTypeRef LLVMIntrinsicGetType(LLVMModuleRef Mod, unsigned ID, LLVMTypeRef *ParamTypes, size_t ParamCount); diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index 156805293367b..896c952c95c14 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -23,6 +23,7 @@ namespace llvm { +class DataLayout; class Type; class FunctionType; class Function; @@ -75,8 +76,7 @@ namespace Intrinsic { LLVM_ABI std::string getNameNoUnnamedTypes(ID Id, ArrayRef Tys); /// Return the function type for an intrinsic. - LLVM_ABI FunctionType *getType(LLVMContext &Context, ID id, - ArrayRef Tys = {}); + LLVM_ABI FunctionType *getType(Module *M, ID id, ArrayRef Tys = {}); /// Returns true if the intrinsic can be overloaded. LLVM_ABI bool isOverloaded(ID id); @@ -141,6 +141,7 @@ namespace Intrinsic { struct IITDescriptor { enum IITDescriptorKind { Void, + Byte, VarArg, MMX, Token, @@ -253,9 +254,9 @@ namespace Intrinsic { /// /// Returns false if the given type matches with the constraints, true /// otherwise. - LLVM_ABI MatchIntrinsicTypesResult - matchIntrinsicSignature(FunctionType *FTy, ArrayRef &Infos, - SmallVectorImpl &ArgTys); + LLVM_ABI MatchIntrinsicTypesResult matchIntrinsicSignature( + const DataLayout &DL, FunctionType *FTy, ArrayRef &Infos, + SmallVectorImpl &ArgTys); /// Verify if the intrinsic has variable arguments. This method is intended to /// be called after all the fixed arguments have been matched first. @@ -270,7 +271,8 @@ namespace Intrinsic { /// /// Returns false if the given ID and function type combination is not a /// valid intrinsic call. - LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, + LLVM_ABI bool getIntrinsicSignature(const DataLayout &DL, Intrinsic::ID, + FunctionType *FT, SmallVectorImpl &ArgTys); /// Same as previous, but accepts a Function instead of ID and FunctionType. diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index bd6f94ac1286c..438910fd2aef5 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -308,6 +308,7 @@ def IIT_V1 : IIT_Vec<1, 28>; def IIT_VARARG : IIT_VT; def IIT_ONE_NTH_ELTS_VEC_ARG : IIT_Base<30>; def IIT_SAME_VEC_WIDTH_ARG : IIT_Base<31>; +def IIT_BYTE : IIT_Base<32>; def IIT_VEC_OF_ANYPTRS_TO_ELT : IIT_Base<34>; def IIT_I128 : IIT_Int<128, 35>; def IIT_V512 : IIT_Vec<512, 36>; @@ -392,6 +393,10 @@ class LLVMType { !foreach(iit, IITs, iit.Number)); } +class LLVMByteType : LLVMType { + let Sig = [IIT_BYTE.Number]; +} + class LLVMAnyType : LLVMType { let
[llvm-branch-commits] [clang] [llvm] [ValueTracking] Add CharWidth argument to getConstantStringInfo (NFC) (PR #106541)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106541 >From 0a21746432336f2460bb916ede9cdb1a1ea61dd6 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Wed, 28 Aug 2024 23:51:13 +0300 Subject: [PATCH] [ValueTracking] Add CharWidth argument to getConstantStringInfo (NFC) The method assumes that host chars and target chars have the same width. Add a CharWidth argument so that it can bail out if the requested char width differs from the host char width. Alternatively, the check could be done at call sites, but this is more error-prone. In the future, this method will be replaced with a different one that allows host/target chars to have different widths. The prototype will be the same except that StringRef is replaced with something that is byte width agnostic. Adding CharWidth argument now reduces the future diff. --- clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 4 +- llvm/include/llvm/Analysis/ValueTracking.h| 2 +- llvm/lib/Analysis/ValueTracking.cpp | 7 +- .../AMDGPU/AMDGPUPrintfRuntimeBinding.cpp | 4 +- llvm/lib/Target/SPIRV/SPIRVAsmPrinter.cpp | 2 +- .../Target/SPIRV/SPIRVPrepareFunctions.cpp| 2 +- .../WebAssembly/WebAssemblyAsmPrinter.cpp | 2 +- .../AggressiveInstCombine.cpp | 12 +- .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 4 +- .../lib/Transforms/Utils/SimplifyLibCalls.cpp | 103 -- 10 files changed, 96 insertions(+), 46 deletions(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index f09b3b92c4ea0..b7b65634238c4 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -226,7 +226,7 @@ void CodeGenFunction::ProcessOrderScopeAMDGCN(Value *Order, Value *Scope, // Some of the atomic builtins take the scope as a string name. StringRef scp; - if (llvm::getConstantStringInfo(Scope, scp)) { + if (llvm::getConstantStringInfo(Scope, scp, /*CharWidth=*/8)) { SSID = getLLVMContext().getOrInsertSyncScopeID(scp); return; } @@ -281,7 +281,7 @@ void CodeGenFunction::AddAMDGPUFenceAddressSpaceMMRA(llvm::Instruction *Inst, for (unsigned K = 2; K < E->getNumArgs(); ++K) { llvm::Value *V = EmitScalarExpr(E->getArg(K)); StringRef AS; -if (llvm::getConstantStringInfo(V, AS)) { +if (llvm::getConstantStringInfo(V, AS, /*CharWidth=*/8)) { MMRAs.push_back({Tag, AS}); // TODO: Delete the resulting unused constant? continue; diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h index 02990a3cb44f7..e6fb3cb9bd044 100644 --- a/llvm/include/llvm/Analysis/ValueTracking.h +++ b/llvm/include/llvm/Analysis/ValueTracking.h @@ -404,7 +404,7 @@ LLVM_ABI bool getConstantDataArrayInfo(const Value *V, /// trailing null characters as well as any other characters that come after /// it. LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, -bool TrimAtNul = true); +unsigned CharWidth, bool TrimAtNul = true); /// If we can compute the length of the string pointed to by the specified /// pointer, return 'len+1'. If we can't, return 0. diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 858d79b7f095b..ed202b8a1f6e5 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6457,9 +6457,12 @@ bool llvm::getConstantDataArrayInfo(const Value *V, /// return true. When TrimAtNul is set, Str will contain only the bytes up /// to but not including the first nul. Return false on failure. bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, - bool TrimAtNul) { + unsigned CharWidth, bool TrimAtNul) { + if (CharWidth != CHAR_BIT) +return false; + ConstantDataArraySlice Slice; - if (!getConstantDataArrayInfo(V, Slice, 8)) + if (!getConstantDataArrayInfo(V, Slice, CharWidth)) return false; if (Slice.Array == nullptr) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp index 7a2a7fc250e27..471dfbc53274c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrintfRuntimeBinding.cpp @@ -121,7 +121,7 @@ static_assert(NonLiteralStr.size() == 3); static StringRef getAsConstantStr(Value *V) { StringRef S; - if (!getConstantStringInfo(V, S)) + if (!getConstantStringInfo(V, S, /*CharWidth=*/8)) S = NonLiteralStr; return S; @@ -154,7 +154,7 @@ bool AMDGPUPrintfRuntimeBindingImpl::lowerPrintfForGpu(Module &M) { Value *Op = CI->getArgOperand(0); StringRef FormatStr; -if (!getConstantStringInfo(Op, FormatStr)) { +if (!getConstantStringInfo(Op, FormatStr, /*CharWidth=*/8)) { V
[llvm-branch-commits] [llvm] [IRBuilder] Add getByteTy and use it in CreatePtrAdd (PR #106539)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106539 >From 517a87c03b68e0a9392841276fd69dd3c483eb12 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 22 Aug 2024 15:10:58 +0300 Subject: [PATCH] [IRBuilder] Add getByteTy and use it in CreatePtrAdd The change requires DataLayout instance to be available, which, in turn, requires insertion point to be set. In-tree tests detected only one case when the function was called without setting an insertion point, it was changed to create a constant expression directly. --- llvm/include/llvm/IR/IRBuilder.h | 10 +++-- .../Instrumentation/SanitizerCoverage.cpp | 5 ++--- llvm/unittests/IR/IRBuilderTest.cpp | 22 +++ 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 7c600e762a451..b0c60056e740d 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -543,6 +543,12 @@ class IRBuilderBase { // Type creation methods //======// + /// Fetch the type representing a byte. + IntegerType *getByteTy() { +const DataLayout &DL = BB->getDataLayout(); +return Type::getIntNTy(Context, DL.getByteWidth()); + } + /// Fetch the type representing a single bit IntegerType *getInt1Ty() { return Type::getInt1Ty(Context); @@ -2040,12 +2046,12 @@ class IRBuilderBase { Value *CreatePtrAdd(Value *Ptr, Value *Offset, const Twine &Name = "", GEPNoWrapFlags NW = GEPNoWrapFlags::none()) { -return CreateGEP(getInt8Ty(), Ptr, Offset, Name, NW); +return CreateGEP(getByteTy(), Ptr, Offset, Name, NW); } Value *CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name = "") { -return CreateGEP(getInt8Ty(), Ptr, Offset, Name, +return CreateGEP(getByteTy(), Ptr, Offset, Name, GEPNoWrapFlags::inBounds()); } diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp index 5b8ea1547ca2f..a45b56696f612 100644 --- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp +++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp @@ -357,14 +357,13 @@ ModuleSanitizerCoverage::CreateSecStartEnd(Module &M, const char *Section, GlobalVariable *SecEnd = new GlobalVariable(M, Ty, false, Linkage, nullptr, getSectionEnd(Section)); SecEnd->setVisibility(GlobalValue::HiddenVisibility); - IRBuilder<> IRB(M.getContext()); if (!TargetTriple.isOSBinFormatCOFF()) return std::make_pair(SecStart, SecEnd); // Account for the fact that on windows-msvc __start_* symbols actually // point to a uint64_t before the start of the array. - auto GEP = - IRB.CreatePtrAdd(SecStart, ConstantInt::get(IntptrTy, sizeof(uint64_t))); + Constant *GEP = ConstantExpr::getGetElementPtr( + Int8Ty, SecStart, ConstantInt::get(IntptrTy, sizeof(uint64_t))); return std::make_pair(GEP, SecEnd); } diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp index 4f2ede3321080..9a0be982b2175 100644 --- a/llvm/unittests/IR/IRBuilderTest.cpp +++ b/llvm/unittests/IR/IRBuilderTest.cpp @@ -525,6 +525,14 @@ TEST_F(IRBuilderTest, DataLayout) { EXPECT_FALSE(M->getDataLayout().isLegalInteger(32)); } +TEST_F(IRBuilderTest, GetByteTy) { + IRBuilder<> Builder(BB); + + EXPECT_TRUE(Builder.getByteTy()->isIntegerTy(8)); + M->setDataLayout("b:32"); + EXPECT_TRUE(Builder.getByteTy()->isIntegerTy(32)); +} + TEST_F(IRBuilderTest, GetIntTy) { IRBuilder<> Builder(BB); IntegerType *Ty1 = Builder.getInt1Ty(); @@ -536,6 +544,20 @@ TEST_F(IRBuilderTest, GetIntTy) { EXPECT_EQ(IntPtrTy, IntegerType::get(Ctx, IntPtrBitSize)); } +TEST_F(IRBuilderTest, CreatePtrAdd) { + IRBuilder<> Builder(BB); + + M->setDataLayout("b:16-p:32:32"); + Value *V = Builder.CreatePtrAdd(GV, ConstantInt::get(Ctx, APInt(32, 42))); + ASSERT_TRUE(isa(V)); + EXPECT_TRUE(cast(V)->getResultElementType()->isIntegerTy(16)); + + M->setDataLayout("b:32-p:64:32"); + V = Builder.CreateInBoundsPtrAdd(GV, ConstantInt::get(Ctx, APInt(64, 42))); + ASSERT_TRUE(isa(V)); + EXPECT_TRUE(cast(V)->getResultElementType()->isIntegerTy(32)); +} + TEST_F(IRBuilderTest, UnaryOperators) { IRBuilder Builder(BB); Value *V = Builder.CreateLoad(GV->getValueType(), GV); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [IR] Account for byte width in m_PtrAdd (PR #106540)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106540 >From 161e6baa5bd1b8349d79e4cf1f4d3ce32348502f Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Thu, 29 Aug 2024 00:54:20 +0300 Subject: [PATCH] [IR] Account for byte width in m_PtrAdd The method has few uses yet, so just pass DL argument to it. The change follows m_PtrToIntSameSize, and I don't see a better way of delivering the byte width to the method. --- llvm/include/llvm/IR/PatternMatch.h | 13 ++ llvm/lib/Analysis/InstructionSimplify.cpp | 2 +- .../InstCombineSimplifyDemanded.cpp | 7 ++--- .../InstCombine/InstructionCombining.cpp | 2 +- llvm/unittests/IR/PatternMatch.cpp| 26 ++- 5 files changed, 34 insertions(+), 16 deletions(-) diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h index ed9b83d5d4361..c5fb399070e02 100644 --- a/llvm/include/llvm/IR/PatternMatch.h +++ b/llvm/include/llvm/IR/PatternMatch.h @@ -1940,15 +1940,17 @@ struct m_SplatOrPoisonMask { }; template struct PtrAdd_match { + const DataLayout &DL; PointerOpTy PointerOp; OffsetOpTy OffsetOp; - PtrAdd_match(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp) - : PointerOp(PointerOp), OffsetOp(OffsetOp) {} + PtrAdd_match(const DataLayout &DL, const PointerOpTy &PointerOp, + const OffsetOpTy &OffsetOp) + : DL(DL), PointerOp(PointerOp), OffsetOp(OffsetOp) {} template bool match(OpTy *V) const { auto *GEP = dyn_cast(V); -return GEP && GEP->getSourceElementType()->isIntegerTy(8) && +return GEP && GEP->getSourceElementType()->isIntegerTy(DL.getByteWidth()) && PointerOp.match(GEP->getPointerOperand()) && OffsetOp.match(GEP->idx_begin()->get()); } @@ -1990,8 +1992,9 @@ inline auto m_GEP(const OperandTypes &...Ops) { /// Matches GEP with i8 source element type template inline PtrAdd_match -m_PtrAdd(const PointerOpTy &PointerOp, const OffsetOpTy &OffsetOp) { - return PtrAdd_match(PointerOp, OffsetOp); +m_PtrAdd(const DataLayout &DL, const PointerOpTy &PointerOp, + const OffsetOpTy &OffsetOp) { + return PtrAdd_match(DL, PointerOp, OffsetOp); } //===--===// diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 82530e7d5b6c6..f310a5db5aee1 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -5389,7 +5389,7 @@ static Value *simplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty, // ptrtoint (ptradd (Ptr, X - ptrtoint(Ptr))) -> X Value *Ptr, *X; if (CastOpc == Instruction::PtrToInt && - match(Op, m_PtrAdd(m_Value(Ptr), + match(Op, m_PtrAdd(Q.DL, m_Value(Ptr), m_Sub(m_Value(X), m_PtrToInt(m_Deferred(Ptr) && X->getType() == Ty && Ty == Q.DL.getIndexType(Ptr->getType())) return X; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 0e3436d12702d..cc5b4f3cb63bf 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -991,9 +991,10 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, Value *InnerPtr; uint64_t GEPIndex; uint64_t PtrMaskImmediate; -if (match(I, m_Intrinsic( - m_PtrAdd(m_Value(InnerPtr), m_ConstantInt(GEPIndex)), - m_ConstantInt(PtrMaskImmediate { +if (match(I, + m_Intrinsic( + m_PtrAdd(DL, m_Value(InnerPtr), m_ConstantInt(GEPIndex)), + m_ConstantInt(PtrMaskImmediate { LHSKnown = computeKnownBits(InnerPtr, I, Depth + 1); if (!LHSKnown.isZero()) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 91a1b61ddc483..66358359275d2 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2595,7 +2595,7 @@ static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP, auto &DL = IC.getDataLayout(); Value *Base; const APInt *C1; - if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1 + if (!match(Src, m_PtrAdd(DL, m_Value(Base), m_APInt(C1 return nullptr; Value *VarIndex; const APInt *C2; diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp index bb7cc0802b1df..c88c01b9e5541 100644 --- a/llvm/unittests/IR/PatternMatch.cpp +++ b/llvm/unittests/IR/PatternMatch.cpp @@ -2599,26 +2599,40 @@ TEST_F(PatternMatchTest, ConstExpr) { EXPECT_TRUE(match(V, m_ConstantExpr())); } -TEST_F(PatternMatch
[llvm-branch-commits] [llvm] [ValueTracking] Make isBytewiseValue byte width agnostic (PR #106538)
https://github.com/s-barannikov updated https://github.com/llvm/llvm-project/pull/106538 >From 81781bc2aa7360f50688e24b361ef768d4d6f961 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Fri, 2 Aug 2024 13:14:49 +0300 Subject: [PATCH] [ValueTracking] Make isBytewiseValue byte width agnostic This is a simple change to show how easy it can be to support unusual byte widths in the middle end. --- llvm/lib/Analysis/ValueTracking.cpp | 30 +++-- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 21f844c4d2f45..858d79b7f095b 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -6068,21 +6068,22 @@ bool llvm::canIgnoreSignBitOfNaN(const Use &U) { } Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { + unsigned ByteWidth = DL.getByteWidth(); // All byte-wide stores are splatable, even of arbitrary variables. - if (V->getType()->isIntegerTy(8)) + if (V->getType()->isIntegerTy(ByteWidth)) return V; LLVMContext &Ctx = V->getContext(); // Undef don't care. - auto *UndefInt8 = UndefValue::get(Type::getInt8Ty(Ctx)); + auto *UndefByte = UndefValue::get(Type::getIntNTy(Ctx, ByteWidth)); if (isa(V)) -return UndefInt8; +return UndefByte; // Return poison for zero-sized type. if (DL.getTypeStoreSize(V->getType()).isZero()) -return PoisonValue::get(Type::getInt8Ty(Ctx)); +return PoisonValue::get(Type::getIntNTy(Ctx, ByteWidth)); Constant *C = dyn_cast(V); if (!C) { @@ -6097,7 +6098,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { // Handle 'null' ConstantArrayZero etc. if (C->isNullValue()) -return Constant::getNullValue(Type::getInt8Ty(Ctx)); +return Constant::getNullValue(Type::getIntNTy(Ctx, ByteWidth)); // Constant floating-point values can be handled as integer values if the // corresponding integer value is "byteable". An important case is 0.0. @@ -6114,13 +6115,14 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { : nullptr; } - // We can handle constant integers that are multiple of 8 bits. + // We can handle constant integers that are multiple of the byte width. if (ConstantInt *CI = dyn_cast(C)) { -if (CI->getBitWidth() % 8 == 0) { - assert(CI->getBitWidth() > 8 && "8 bits should be handled above!"); - if (!CI->getValue().isSplat(8)) +if (CI->getBitWidth() % ByteWidth == 0) { + assert(CI->getBitWidth() > ByteWidth && + "single byte should be handled above!"); + if (!CI->getValue().isSplat(ByteWidth)) return nullptr; - return ConstantInt::get(Ctx, CI->getValue().trunc(8)); + return ConstantInt::get(Ctx, CI->getValue().trunc(ByteWidth)); } } @@ -6140,15 +6142,15 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { return LHS; if (!LHS || !RHS) return nullptr; -if (LHS == UndefInt8) +if (LHS == UndefByte) return RHS; -if (RHS == UndefInt8) +if (RHS == UndefByte) return LHS; return nullptr; }; if (ConstantDataSequential *CA = dyn_cast(C)) { -Value *Val = UndefInt8; +Value *Val = UndefByte; for (uint64_t I = 0, E = CA->getNumElements(); I != E; ++I) if (!(Val = Merge(Val, isBytewiseValue(CA->getElementAsConstant(I), DL return nullptr; @@ -6156,7 +6158,7 @@ Value *llvm::isBytewiseValue(Value *V, const DataLayout &DL) { } if (isa(C)) { -Value *Val = UndefInt8; +Value *Val = UndefByte; for (Value *Op : C->operands()) if (!(Val = Merge(Val, isBytewiseValue(Op, DL return nullptr; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 9c48cb1 - Revert "[mlir][vector] Refactor WarpOpScfForOp to support unused or swapped f…"
Author: Charitha Saumya Date: 2025-07-11T13:19:18-07:00 New Revision: 9c48cb1fcd99423956f2dd376ddf6c44b80c343d URL: https://github.com/llvm/llvm-project/commit/9c48cb1fcd99423956f2dd376ddf6c44b80c343d DIFF: https://github.com/llvm/llvm-project/commit/9c48cb1fcd99423956f2dd376ddf6c44b80c343d.diff LOG: Revert "[mlir][vector] Refactor WarpOpScfForOp to support unused or swapped f…" This reverts commit 3092b765ba0b2d20bd716944dda86ea8e4ad12e3. Added: Modified: mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp mlir/test/Dialect/Vector/vector-warp-distribute.mlir Removed: diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp index e62031412eab6..c8566b1ff83ef 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp @@ -1704,18 +1704,19 @@ struct WarpOpScfForOp : public WarpDistributionPattern { : WarpDistributionPattern(ctx, b), distributionMapFn(std::move(fn)) {} LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { -auto warpOpYield = cast( +auto yield = cast( warpOp.getBodyRegion().getBlocks().begin()->getTerminator()); -// Only pick up `ForOp` if it is the last op in the region. -Operation *lastNode = warpOpYield->getPrevNode(); +// Only pick up forOp if it is the last op in the region. +Operation *lastNode = yield->getPrevNode(); auto forOp = dyn_cast_or_null(lastNode); if (!forOp) return failure(); -// Collect Values that come from the `WarpOp` but are outside the `ForOp`. -// Those Values need to be returned by the new warp op. +// Collect Values that come from the warp op but are outside the forOp. +// Those Value needs to be returned by the original warpOp and passed to +// the new op. llvm::SmallSetVector escapingValues; -SmallVector escapingValueInputTypes; -SmallVector escapingValueDistTypes; +SmallVector inputTypes; +SmallVector distTypes; mlir::visitUsedValuesDefinedAbove( forOp.getBodyRegion(), [&](OpOperand *operand) { Operation *parent = operand->get().getParentRegion()->getParentOp(); @@ -1727,153 +1728,81 @@ struct WarpOpScfForOp : public WarpDistributionPattern { AffineMap map = distributionMapFn(operand->get()); distType = getDistributedType(vecType, map, warpOp.getWarpSize()); } -escapingValueInputTypes.push_back(operand->get().getType()); -escapingValueDistTypes.push_back(distType); +inputTypes.push_back(operand->get().getType()); +distTypes.push_back(distType); } }); -if (llvm::is_contained(escapingValueDistTypes, Type{})) +if (llvm::is_contained(distTypes, Type{})) return failure(); -// `WarpOp` can yield two types of values: -// 1. Values that are not results of the `ForOp`: -//These values must also be yielded by the new `WarpOp`. Also, we need -//to record the index mapping for these values to replace them later. -// 2. Values that are results of the `ForOp`: -//In this case, we record the index mapping between the `WarpOp` result -//index and matching `ForOp` result index. -SmallVector nonForYieldedValues; -SmallVector nonForResultIndices; -llvm::SmallDenseMap forResultMapping; -for (OpOperand &yieldOperand : warpOpYield->getOpOperands()) { - // Yielded value is not a result of the forOp. - if (yieldOperand.get().getDefiningOp() != forOp.getOperation()) { -nonForYieldedValues.push_back(yieldOperand.get()); -nonForResultIndices.push_back(yieldOperand.getOperandNumber()); + +SmallVector newRetIndices; +WarpExecuteOnLane0Op newWarpOp = moveRegionToNewWarpOpAndAppendReturns( +rewriter, warpOp, escapingValues.getArrayRef(), distTypes, +newRetIndices); +yield = cast( +newWarpOp.getBodyRegion().getBlocks().begin()->getTerminator()); + +SmallVector newOperands; +SmallVector resultIdx; +// Collect all the outputs coming from the forOp. +for (OpOperand &yieldOperand : yield->getOpOperands()) { + if (yieldOperand.get().getDefiningOp() != forOp.getOperation()) continue; - } - OpResult forResult = cast(yieldOperand.get()); - forResultMapping[yieldOperand.getOperandNumber()] = - forResult.getResultNumber(); + auto forResult = cast(yieldOperand.get()); + newOperands.push_back( + newWarpOp.getResult(yieldOperand.getOperandNumber())); + yieldOperand.set(forOp.getInitArgs()[forResult.getResultNumber()]); + resultIdx.push_back(yieldOperand.g
[llvm-branch-commits] [clang] [HLSL][RootSignature] Allow for multiple parsing errors in `RootSignatureParser` (PR #147832)
https://github.com/joaosaffran approved this pull request. https://github.com/llvm/llvm-project/pull/147832 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL][RootSignature] Audit `RootSignatureParser` diagnostic production (PR #147800)
https://github.com/joaosaffran approved this pull request. https://github.com/llvm/llvm-project/pull/147800 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL][RootSignature] Audit `RootSignatureParser` diagnostic production (PR #147800)
https://github.com/Icohedron edited https://github.com/llvm/llvm-project/pull/147800 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL][RootSignature] Audit `RootSignatureParser` diagnostic production (PR #147800)
https://github.com/Icohedron edited https://github.com/llvm/llvm-project/pull/147800 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL][RootSignature] Audit `RootSignatureParser` diagnostic production (PR #147800)
https://github.com/Icohedron approved this pull request. Seems fine to me. Could use a second approval to make sure https://github.com/llvm/llvm-project/pull/147800 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [HLSL][RootSignature] Audit `RootSignatureParser` diagnostic production (PR #147800)
@@ -17,24 +17,89 @@ void bad_root_signature_2() {} [RootSignature(""), RootSignature("")] // expected-warning {{attribute 'RootSignature' is already applied}} void bad_root_signature_3() {} -[RootSignature("DescriptorTable(), invalid")] // expected-error {{expected end of stream to denote end of parameters, or, another valid parameter of RootSignature}} +// expected-error@+1 {{invalid parameter of RootSignature}} +[RootSignature("DescriptorTable(), invalid")] void bad_root_signature_4() {} -// expected-error@+1 {{expected ')' to denote end of parameters, or, another valid parameter of RootConstants}} -[RootSignature("RootConstants(b0, num32BitConstants = 1, invalid)")] +// expected-error@+1 {{expected ')' or ','}} Icohedron wrote: nit: Perhaps check for column number to see where a ')' or ',' is expected? https://github.com/llvm/llvm-project/pull/147800 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] [libc] Modular printf option (float only) (PR #147426)
@@ -0,0 +1,41 @@ +#ifdef LIBC_COPT_PRINTF_MODULAR +#include "src/__support/arg_list.h" + +#define LIBC_PRINTF_DEFINE_MODULAR +#include "src/stdio/printf_core/float_dec_converter.h" +#include "src/stdio/printf_core/float_hex_converter.h" +#include "src/stdio/printf_core/parser.h" + +namespace LIBC_NAMESPACE_DECL { +namespace printf_core { +template class Parser; +template class Parser>; +template class Parser>; +template class Parser>; +template class Parser>; + +#define INSTANTIATE_CONVERT_FN(NAME) \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv); \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv); \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv); \ + template int NAME( \ + Writer * writer, \ + const FormatSection &to_conv) + +INSTANTIATE_CONVERT_FN(convert_float_decimal); +INSTANTIATE_CONVERT_FN(convert_float_dec_exp); +INSTANTIATE_CONVERT_FN(convert_float_dec_auto); +INSTANTIATE_CONVERT_FN(convert_float_hex_exp); + +} // namespace printf_core +} // namespace LIBC_NAMESPACE_DECL + +// Bring this file into the link if __printf_float is referenced. +extern "C" void __printf_float() {} statham-arm wrote: OK – of course you're right that those workarounds can easily be added later. As long as you've thought about this and checked it, I'm happy. (Now I've slept on it, I think I vaguely recall that one rationale for armlink's choice of behavior is that it makes partial linking change the semantics less: if you combine a bunch of `.o` files into one big `.o`, then passing the combined file to a full link step has a very similar effect to passing the individual files one by one.) https://github.com/llvm/llvm-project/pull/147426 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/143880 >From c4bda032514d199feafe799693d53e118874e3d8 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 12 Jun 2025 06:13:26 -0400 Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns Pre-committing tests to show improvements in a follow-up PR. --- .../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++ 1 file changed, 45 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index c00bccdbce6b7..d48bfe0bb7f21 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) { store float 1.0, ptr addrspace(1) %p1 ret void } + +; Use non-zero shift amounts in v_lshl_add_u64. +define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) { +; GFX942_PTRADD-LABEL: select_v_lshl_add_u64: +; GFX942_PTRADD: ; %bb.0: +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3] +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] +; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] +; +; GFX942_LEGACY-LABEL: select_v_lshl_add_u64: +; GFX942_LEGACY: ; %bb.0: +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1] +; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] + %gep = getelementptr inbounds i64, ptr %base, i64 %voffset + ret ptr %gep +} + +; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the +; mul into a mul24. +define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) { +; GFX942_PTRADD-LABEL: fold_mul24_into_mad: +; GFX942_PTRADD: ; %bb.0: +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4 +; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4 +; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4 +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] +; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] +; +; GFX942_LEGACY-LABEL: fold_mul24_into_mad: +; GFX942_LEGACY: ; %bb.0: +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2 +; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4 +; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1] +; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] + %a_masked = and i64 %a, u0xf + %b_masked = and i64 %b, u0xf + %mul = mul i64 %a_masked, %b_masked + %gep = getelementptr inbounds i8, ptr %base, i64 %mul + ret ptr %gep +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns (PR #143880)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/143880 >From c4bda032514d199feafe799693d53e118874e3d8 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 12 Jun 2025 06:13:26 -0400 Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in VOP3 patterns Pre-committing tests to show improvements in a follow-up PR. --- .../AMDGPU/ptradd-sdag-optimizations.ll | 45 +++ 1 file changed, 45 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index c00bccdbce6b7..d48bfe0bb7f21 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -263,3 +263,48 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) { store float 1.0, ptr addrspace(1) %p1 ret void } + +; Use non-zero shift amounts in v_lshl_add_u64. +define ptr @select_v_lshl_add_u64(ptr %base, i64 %voffset) { +; GFX942_PTRADD-LABEL: select_v_lshl_add_u64: +; GFX942_PTRADD: ; %bb.0: +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshlrev_b64 v[2:3], 3, v[2:3] +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] +; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] +; +; GFX942_LEGACY-LABEL: select_v_lshl_add_u64: +; GFX942_LEGACY: ; %bb.0: +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_LEGACY-NEXT:v_lshl_add_u64 v[0:1], v[2:3], 3, v[0:1] +; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] + %gep = getelementptr inbounds i64, ptr %base, i64 %voffset + ret ptr %gep +} + +; Fold mul and add into v_mad, even if amdgpu-codegenprepare-mul24 turned the +; mul into a mul24. +define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) { +; GFX942_PTRADD-LABEL: fold_mul24_into_mad: +; GFX942_PTRADD: ; %bb.0: +; GFX942_PTRADD-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_and_b32_e32 v2, 0xf, v2 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v4, 0xf, v4 +; GFX942_PTRADD-NEXT:v_mul_hi_u32_u24_e32 v3, v2, v4 +; GFX942_PTRADD-NEXT:v_mul_u32_u24_e32 v2, v2, v4 +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], v[0:1], 0, v[2:3] +; GFX942_PTRADD-NEXT:s_setpc_b64 s[30:31] +; +; GFX942_LEGACY-LABEL: fold_mul24_into_mad: +; GFX942_LEGACY: ; %bb.0: +; GFX942_LEGACY-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942_LEGACY-NEXT:v_and_b32_e32 v2, 0xf, v2 +; GFX942_LEGACY-NEXT:v_and_b32_e32 v3, 0xf, v4 +; GFX942_LEGACY-NEXT:v_mad_u64_u32 v[0:1], s[0:1], v2, v3, v[0:1] +; GFX942_LEGACY-NEXT:s_setpc_b64 s[30:31] + %a_masked = and i64 %a, u0xf + %b_masked = and i64 %b, u0xf + %mul = mul i64 %a_masked, %b_masked + %gep = getelementptr inbounds i8, ptr %base, i64 %mul + ret ptr %gep +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms (PR #146074)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/146074 >From 249fbdfc77b2f3dcb299ba8aefb4aa62b57a38d1 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 26 Jun 2025 06:10:35 -0400 Subject: [PATCH] [SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds, that targets can use to allow transformations to introduce out-of-bounds pointer arithmetic. It also moves two such transformations from the AMDGPU-specific DAG combines to the generic DAGCombiner. This is motivated by target features like AArch64's checked pointer arithmetic, CPA, which does not tolerate the introduction of out-of-bounds pointer arithmetic. --- llvm/include/llvm/CodeGen/TargetLowering.h| 7 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 125 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 59 ++--- llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 + 4 files changed, 94 insertions(+), 100 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 69ae4f80297d5..ba4f23b2d9191 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3510,6 +3510,13 @@ class LLVM_ABI TargetLoweringBase { return false; } + /// True if the target allows transformations of in-bounds pointer + /// arithmetic that cause out-of-bounds intermediate results. + virtual bool canTransformPtrArithOutOfBounds(const Function &F, + EVT PtrVT) const { +return false; + } + /// Does this target support complex deinterleaving virtual bool isComplexDeinterleavingSupported() const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 08dab7c697b99..3626ac45a4860 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) { if (PtrVT == IntVT && isNullConstant(N0)) return N1; - if (N0.getOpcode() != ISD::PTRADD || - reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) -return SDValue(); - - SDValue X = N0.getOperand(0); - SDValue Y = N0.getOperand(1); - SDValue Z = N1; - bool N0OneUse = N0.hasOneUse(); - bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); - bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); - - // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if: - // * y is a constant and (ptradd x, y) has one use; or - // * y and z are both constants. - if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) { -// If both additions in the original were NUW, the new ones are as well. -SDNodeFlags Flags = -(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; -SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags); -AddToWorklist(Add.getNode()); -return DAG.getMemBasePlusOffset(X, Add, DL, Flags); + if (N0.getOpcode() == ISD::PTRADD && + !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) { +SDValue X = N0.getOperand(0); +SDValue Y = N0.getOperand(1); +SDValue Z = N1; +bool N0OneUse = N0.hasOneUse(); +bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); +bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); + +// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if: +// * y is a constant and (ptradd x, y) has one use; or +// * y and z are both constants. +if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) { + // If both additions in the original were NUW, the new ones are as well. + SDNodeFlags Flags = + (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; + SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags); + AddToWorklist(Add.getNode()); + return DAG.getMemBasePlusOffset(X, Add, DL, Flags); +} + } + + // The following combines can turn in-bounds pointer arithmetic out of bounds. + // That is problematic for settings like AArch64's CPA, which checks that + // intermediate results of pointer arithmetic remain in bounds. The target + // therefore needs to opt-in to enable them. + if (!TLI.canTransformPtrArithOutOfBounds( + DAG.getMachineFunction().getFunction(), PtrVT)) +return SDValue(); + + if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) { +// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with +// global address GA and constant c, such that c can be folded into GA. +SDValue GAValue = N0.getOperand(0); +if (const GlobalAddressSDNode *GA = +dyn_cast(GAValue)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!LegalOperations && TLI.isOf
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns (PR #143881)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/143881 >From 531b230f3a828d5f39cf0d2393d18d961d6be42d Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 12 Jun 2025 07:44:37 -0400 Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns This patch mirrors similar patterns for ISD::ADD. The main difference is that ISD::ADD is commutative, so that a pattern definition for, e.g., (add (mul x, y), z), automatically also handles (add z, (mul x, y)). ISD::PTRADD is not commutative, so we would need to handle these cases explicitly. This patch only implements (ptradd z, (op x, y)) patterns, where the nested operation (shift or multiply) is the offset of the ptradd (i.e., the right operand), since base pointers that are the result of a shift or multiply seem less likely. For SWDEV-516125. --- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 36 +++- .../AMDGPU/ptradd-sdag-optimizations.ll | 41 ++ llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll | 42 +++ 3 files changed, 52 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 279de32a9cee8..4548beadf23ae 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -512,12 +512,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue defm: Ternary_i16_Pats_gfx9; } // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts -class ThreeOpFragSDAG : PatFrag< +class ThreeOpFragSDAG : PatFrag< (ops node:$x, node:$y, node:$z), // When the inner operation is used multiple times, selecting 3-op // instructions may still be beneficial -- if the other users can be // combined similarly. Let's be conservative for now. - (op2 (HasOneUseBinOp node:$x, node:$y), node:$z), + !if(op1IsRight, (op2 node:$z, (HasOneUseBinOp node:$x, node:$y)), + (op2 (HasOneUseBinOp node:$x, node:$y), node:$z)), [{ // Only use VALU ops when the result is divergent. if (!N->isDivergent()) @@ -544,7 +545,10 @@ class ThreeOpFragSDAG : PatFrag< let PredicateCodeUsesOperands = 1; } -class ThreeOpFrag : ThreeOpFragSDAG { +// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and +// matches (op2 z, (op1, x, y)) if op1IsRight = 1. +class ThreeOpFrag : ThreeOpFragSDAG { // The divergence predicate is irrelevant in GlobalISel, as we have // proper register bank checks. We just need to verify the constant // bus restriction when all the sources are considered. @@ -834,12 +838,19 @@ def : GCNPat< (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1), (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; -let SubtargetPredicate = HasLshlAddU64Inst in +let SubtargetPredicate = HasLshlAddU64Inst in { def : GCNPat< (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) >; +def : GCNPat < + // (ptradd z, (shl x, y)) -> ((x << y) + z) + (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), + (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) +>; +} // End SubtargetPredicate = HasLshlAddU64Inst + def : VOPBinOpClampPat; def : VOPBinOpClampPat; @@ -908,19 +919,24 @@ multiclass IMAD32_Pats { // Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul. // We need to separate this because otherwise OtherPredicates would be overriden. -class IMAD32_Mul24_Pat: GCNPat < -(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)), -(inst $src0, $src1, $src2, 0 /* clamp */) ->; +class IMAD32_Mul24_Pats_Impl : GCNPat < +!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1, +(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2))), +(inst $src0, $src1, $src2, 0 /* clamp */)>; + +multiclass IMAD32_Mul24_Pats { + def : IMAD32_Mul24_Pats_Impl; + def : IMAD32_Mul24_Pats_Impl; +} // exclude pre-GFX9 where it was slow let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in { defm : IMAD32_Pats; - def : IMAD32_Mul24_Pat; + defm : IMAD32_Mul24_Pats; } let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in { defm : IMAD32_Pats; - def : IMAD32_Mul24_Pat; + defm : IMAD32_Mul24_Pats; } def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index d48bfe0bb7f21..34bb98550de04 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -266,18 +266,11 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) { ; Use non-zero shift amounts in v_lshl_add_u64. define ptr @select_v_lshl_add_
[llvm-branch-commits] [llvm] [SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms (PR #146074)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/146074 >From 249fbdfc77b2f3dcb299ba8aefb4aa62b57a38d1 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 26 Jun 2025 06:10:35 -0400 Subject: [PATCH] [SDAG][AMDGPU] Allow opting in to OOB-generating PTRADD transforms This PR adds a TargetLowering hook, canTransformPtrArithOutOfBounds, that targets can use to allow transformations to introduce out-of-bounds pointer arithmetic. It also moves two such transformations from the AMDGPU-specific DAG combines to the generic DAGCombiner. This is motivated by target features like AArch64's checked pointer arithmetic, CPA, which does not tolerate the introduction of out-of-bounds pointer arithmetic. --- llvm/include/llvm/CodeGen/TargetLowering.h| 7 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 125 +++--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 59 ++--- llvm/lib/Target/AMDGPU/SIISelLowering.h | 3 + 4 files changed, 94 insertions(+), 100 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 69ae4f80297d5..ba4f23b2d9191 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3510,6 +3510,13 @@ class LLVM_ABI TargetLoweringBase { return false; } + /// True if the target allows transformations of in-bounds pointer + /// arithmetic that cause out-of-bounds intermediate results. + virtual bool canTransformPtrArithOutOfBounds(const Function &F, + EVT PtrVT) const { +return false; + } + /// Does this target support complex deinterleaving virtual bool isComplexDeinterleavingSupported() const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 08dab7c697b99..3626ac45a4860 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2689,59 +2689,82 @@ SDValue DAGCombiner::visitPTRADD(SDNode *N) { if (PtrVT == IntVT && isNullConstant(N0)) return N1; - if (N0.getOpcode() != ISD::PTRADD || - reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) -return SDValue(); - - SDValue X = N0.getOperand(0); - SDValue Y = N0.getOperand(1); - SDValue Z = N1; - bool N0OneUse = N0.hasOneUse(); - bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); - bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); - - // (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if: - // * y is a constant and (ptradd x, y) has one use; or - // * y and z are both constants. - if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) { -// If both additions in the original were NUW, the new ones are as well. -SDNodeFlags Flags = -(N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; -SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags); -AddToWorklist(Add.getNode()); -return DAG.getMemBasePlusOffset(X, Add, DL, Flags); + if (N0.getOpcode() == ISD::PTRADD && + !reassociationCanBreakAddressingModePattern(ISD::PTRADD, DL, N, N0, N1)) { +SDValue X = N0.getOperand(0); +SDValue Y = N0.getOperand(1); +SDValue Z = N1; +bool N0OneUse = N0.hasOneUse(); +bool YIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Y); +bool ZIsConstant = DAG.isConstantIntBuildVectorOrConstantInt(Z); + +// (ptradd (ptradd x, y), z) -> (ptradd x, (add y, z)) if: +// * y is a constant and (ptradd x, y) has one use; or +// * y and z are both constants. +if ((YIsConstant && N0OneUse) || (YIsConstant && ZIsConstant)) { + // If both additions in the original were NUW, the new ones are as well. + SDNodeFlags Flags = + (N->getFlags() & N0->getFlags()) & SDNodeFlags::NoUnsignedWrap; + SDValue Add = DAG.getNode(ISD::ADD, DL, IntVT, {Y, Z}, Flags); + AddToWorklist(Add.getNode()); + return DAG.getMemBasePlusOffset(X, Add, DL, Flags); +} + } + + // The following combines can turn in-bounds pointer arithmetic out of bounds. + // That is problematic for settings like AArch64's CPA, which checks that + // intermediate results of pointer arithmetic remain in bounds. The target + // therefore needs to opt-in to enable them. + if (!TLI.canTransformPtrArithOutOfBounds( + DAG.getMachineFunction().getFunction(), PtrVT)) +return SDValue(); + + if (N0.getOpcode() == ISD::PTRADD && N1.getOpcode() == ISD::Constant) { +// Fold (ptradd (ptradd GA, v), c) -> (ptradd (ptradd GA, c) v) with +// global address GA and constant c, such that c can be folded into GA. +SDValue GAValue = N0.getOperand(0); +if (const GlobalAddressSDNode *GA = +dyn_cast(GAValue)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!LegalOperations && TLI.isOf
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases (PR #145330)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145330 >From b8fe4ab7861d858e98afe35a762ce53dc8d89a86 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 17 Jun 2025 04:03:53 -0400 Subject: [PATCH 1/2] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases There are more places in SIISelLowering.cpp and AMDGPUISelDAGToDAG.cpp that check for ISD::ADD in a pointer context, but as far as I can tell those are only relevant for 32-bit pointer arithmetic (like frame indices/scratch addresses and LDS), for which we don't enable PTRADD generation yet. For SWDEV-516125. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 21 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 67 ++ .../AMDGPU/ptradd-sdag-optimizations.ll | 196 ++ 6 files changed, 105 insertions(+), 194 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45edcf9992706..efe4639535536 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8219,7 +8219,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast(Src); - else if (Src.getOpcode() == ISD::ADD && + else if (Src->isAnyAdd() && Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast(Src.getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 977579e851e33..81286c66e8ffc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -617,8 +617,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // operands on the new node are also disjoint. SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint : SDNodeFlags::None); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::PTRADD) { +// It isn't a ptradd anymore if it doesn't operate on the entire +// pointer. +Opcode = ISD::ADD; + } SDValue X = DAG.getNode( - Op.getOpcode(), dl, SmallVT, + Opcode, dl, SmallVT, DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags); assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); @@ -2853,6 +2859,11 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, And1); } [[fallthrough]]; + case ISD::PTRADD: +if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) + break; +// PTRADD behaves like ADD if pointers are represented as integers. +[[fallthrough]]; case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that @@ -2962,10 +2973,10 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::MUL) { Known = KnownBits::mul(KnownOp0, KnownOp1); -} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB. +} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB. Known = KnownBits::computeForAddSub( - Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), - Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1); + Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(), + KnownOp0, KnownOp1); } break; } @@ -5608,7 +5619,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, return true; } - if (N->getOpcode() == ISD::ADD) { + if (N->isAnyAdd()) { SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6e990cb2e160c..ee73ad5dda945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1449,7 +1449,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, C1 = nullptr; } - if (N0.getOpcode() == ISD::ADD) { + if (N0->isAnyAdd()) { // (add N2, N3) -> addr64, or // (add (add N2, N3), C1) -> addr64 SDValue N2 = N0.getOperand(0); @@ -1899,7 +1899,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, } // Match the variable offset. - if (Addr.getOpcode() == ISD::ADD) { + if (Addr->isAnyAdd()) { LHS = Addr.getOperand(0); RHS = Addr.getOperand(1);
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases (PR #145330)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145330 >From b8fe4ab7861d858e98afe35a762ce53dc8d89a86 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 17 Jun 2025 04:03:53 -0400 Subject: [PATCH 1/2] [AMDGPU][SDAG] Handle ISD::PTRADD in various special cases There are more places in SIISelLowering.cpp and AMDGPUISelDAGToDAG.cpp that check for ISD::ADD in a pointer context, but as far as I can tell those are only relevant for 32-bit pointer arithmetic (like frame indices/scratch addresses and LDS), for which we don't enable PTRADD generation yet. For SWDEV-516125. --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 2 +- .../CodeGen/SelectionDAG/TargetLowering.cpp | 21 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 67 ++ .../AMDGPU/ptradd-sdag-optimizations.ll | 196 ++ 6 files changed, 105 insertions(+), 194 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 45edcf9992706..efe4639535536 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -8219,7 +8219,7 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) { GlobalAddressSDNode *G = nullptr; if (Src.getOpcode() == ISD::GlobalAddress) G = cast(Src); - else if (Src.getOpcode() == ISD::ADD && + else if (Src->isAnyAdd() && Src.getOperand(0).getOpcode() == ISD::GlobalAddress && Src.getOperand(1).getOpcode() == ISD::Constant) { G = cast(Src.getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 977579e851e33..81286c66e8ffc 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -617,8 +617,14 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // operands on the new node are also disjoint. SDNodeFlags Flags(Op->getFlags().hasDisjoint() ? SDNodeFlags::Disjoint : SDNodeFlags::None); + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::PTRADD) { +// It isn't a ptradd anymore if it doesn't operate on the entire +// pointer. +Opcode = ISD::ADD; + } SDValue X = DAG.getNode( - Op.getOpcode(), dl, SmallVT, + Opcode, dl, SmallVT, DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)), Flags); assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); @@ -2853,6 +2859,11 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, And1); } [[fallthrough]]; + case ISD::PTRADD: +if (Op.getOperand(0).getValueType() != Op.getOperand(1).getValueType()) + break; +// PTRADD behaves like ADD if pointers are represented as integers. +[[fallthrough]]; case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that @@ -2962,10 +2973,10 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::MUL) { Known = KnownBits::mul(KnownOp0, KnownOp1); -} else { // Op.getOpcode() is either ISD::ADD or ISD::SUB. +} else { // Op.getOpcode() is either ISD::ADD, ISD::PTRADD, or ISD::SUB. Known = KnownBits::computeForAddSub( - Op.getOpcode() == ISD::ADD, Flags.hasNoSignedWrap(), - Flags.hasNoUnsignedWrap(), KnownOp0, KnownOp1); + Op->isAnyAdd(), Flags.hasNoSignedWrap(), Flags.hasNoUnsignedWrap(), + KnownOp0, KnownOp1); } break; } @@ -5608,7 +5619,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *WN, const GlobalValue *&GA, return true; } - if (N->getOpcode() == ISD::ADD) { + if (N->isAnyAdd()) { SDValue N1 = N->getOperand(0); SDValue N2 = N->getOperand(1); if (isGAPlusOffset(N1.getNode(), GA, Offset)) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 6e990cb2e160c..ee73ad5dda945 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1449,7 +1449,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, C1 = nullptr; } - if (N0.getOpcode() == ISD::ADD) { + if (N0->isAnyAdd()) { // (add N2, N3) -> addr64, or // (add (add N2, N3), C1) -> addr64 SDValue N2 = N0.getOperand(0); @@ -1899,7 +1899,7 @@ bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, } // Match the variable offset. - if (Addr.getOpcode() == ISD::ADD) { + if (Addr->isAnyAdd()) { LHS = Addr.getOperand(0); RHS = Addr.getOperand(1);
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default (PR #146076)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/146076 >From f5f615abd4cb1d130876b720891b0ac2b58ace9c Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Fri, 27 Jun 2025 05:38:52 -0400 Subject: [PATCH] [AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default Also removes the command line option to control this feature. There seem to be mainly two kinds of test changes: - Some operands of addition instructions are swapped; that is to be expected since PTRADD is not commutative. - Improvements in code generation, probably because the legacy lowering enabled some transformations that were sometimes harmful. For SWDEV-516125. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 +- .../identical-subrange-spill-infloop.ll | 354 +++--- .../AMDGPU/infer-addrspace-flat-atomic.ll | 14 +- llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll | 8 +- .../AMDGPU/lower-module-lds-via-hybrid.ll | 4 +- .../AMDGPU/lower-module-lds-via-table.ll | 16 +- .../match-perm-extract-vector-elt-bug.ll | 22 +- llvm/test/CodeGen/AMDGPU/memmove-var-size.ll | 16 +- .../AMDGPU/preload-implicit-kernargs.ll | 6 +- .../AMDGPU/promote-constOffset-to-imm.ll | 8 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 7 +- .../AMDGPU/ptradd-sdag-optimizations.ll | 94 ++--- .../AMDGPU/ptradd-sdag-undef-poison.ll| 6 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll | 27 +- llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll | 29 +- 15 files changed, 311 insertions(+), 310 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2f05c240e8032..361088e29213b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -63,14 +63,6 @@ static cl::opt UseDivergentRegisterIndexing( cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false)); -// TODO: This option should be removed once we switch to always using PTRADD in -// the SelectionDAG. -static cl::opt UseSelectionDAGPTRADD( -"amdgpu-use-sdag-ptradd", cl::Hidden, -cl::desc("Generate ISD::PTRADD nodes for 64-bit pointer arithmetic in the " - "SelectionDAG ISel"), -cl::init(false)); - static bool denormalModeIsFlushAllF32(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign(); @@ -10599,7 +10591,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool SITargetLowering::shouldPreservePtrArith(const Function &F, EVT PtrVT) const { - return UseSelectionDAGPTRADD && PtrVT == MVT::i64; + return PtrVT == MVT::i64; } bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F, diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 56ceba258f471..f9fcf489bd389 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -6,97 +6,151 @@ define void @main(i1 %arg) #0 { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT:s_xor_saveexec_b64 s[4:5], -1 -; CHECK-NEXT:buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT:buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT:buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT:buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT:s_mov_b64 exec, s[4:5] -; CHECK-NEXT:v_writelane_b32 v5, s30, 0 -; CHECK-NEXT:v_writelane_b32 v5, s31, 1 -; CHECK-NEXT:v_writelane_b32 v5, s36, 2 -; CHECK-NEXT:v_writelane_b32 v5, s37, 3 -; CHECK-NEXT:v_writelane_b32 v5, s38, 4 -; CHECK-NEXT:v_writelane_b32 v5, s39, 5 -; CHECK-NEXT:v_writelane_b32 v5, s48, 6 -; CHECK-NEXT:v_writelane_b32 v5, s49, 7 -; CHECK-NEXT:v_writelane_b32 v5, s50, 8 -; CHECK-NEXT:v_writelane_b32 v5, s51, 9 -; CHECK-NEXT:v_writelane_b32 v5, s52, 10 -; CHECK-NEXT:v_writelane_b32 v5, s53, 11 -; CHECK-NEXT:v_writelane_b32 v5, s54, 12 -; CHECK-NEXT:v_writelane_b32 v5, s55, 13 -; CHECK-NEXT:s_getpc_b64 s[24:25] -; CHECK-NEXT:v_writelane_b32 v5, s64, 14 -; CHECK-NEXT:s_movk_i32 s4, 0xf0 -; CHECK-NEXT:s_mov_b32 s5, s24 -; CHECK-NEXT:v_writelane_b32 v5, s65, 15 -; CHECK-NEXT:s_load_dwordx16 s[8:23], s[4:5], 0x0 -; CHECK-NEXT:s_mov_b64 s[4:5], 0 -; CHECK-NEXT:v_writelane_b32 v5, s66, 16 -; CHECK-NEXT:s_load_dwordx4 s[4:7], s[4:5], 0x0 -; CHECK-NEXT:v_writelane_b32 v5, s67, 17 -; CHECK-NEXT:s_waitcnt lgkmcnt(0) -; CHECK-NEXT:s_movk_i32 s6, 0x130 -; CHECK-NEXT:s_mov_b32 s7, s24 -; CHECK-NEXT:v_writela
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] DAGCombine PTRADD -> disjoint OR (PR #146075)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/146075 >From 2bc75df51ee121e06467acf47f74b87ae22fd4f7 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Fri, 27 Jun 2025 04:23:50 -0400 Subject: [PATCH 1/3] [AMDGPU][SDAG] DAGCombine PTRADD -> disjoint OR If we can't fold a PTRADD's offset into its users, lowering them to disjoint ORs is preferable: Often, a 32-bit OR instruction suffices where we'd otherwise use a pair of 32-bit additions with carry. This needs to be a DAGCombine (and not a selection rule) because its main purpose is to enable subsequent DAGCombines for bitwise operations. We don't want to just turn PTRADDs into disjoint ORs whenever that's sound because this transform loses the information that the operation implements pointer arithmetic, which we will soon need to fold offsets into FLAT instructions. Currently, disjoint ORs can still be used for offset folding, so that part of the logic can't be tested. The PR contains a hacky workaround for a situation where an AssertAlign operand of a PTRADD is not DAGCombined before the PTRADD, causing the PTRADD to be turned into a disjoint OR although reassociating it with the operand of the AssertAlign would be better. This wouldn't be a problem if the DAGCombiner ensured that a node is only processed after all its operands have been processed. For SWDEV-516125. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 35 .../AMDGPU/ptradd-sdag-optimizations.ll | 56 ++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2f05c240e8032..00e093c42dd4b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -15144,6 +15144,41 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N, return Folded; } + // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if + // that transformation can't block an offset folding at any use of the ptradd. + // This should be done late, after legalization, so that it doesn't block + // other ptradd combines that could enable more offset folding. + bool HasIntermediateAssertAlign = + N0->getOpcode() == ISD::AssertAlign && N0->getOperand(0)->isAnyAdd(); + // This is a hack to work around an ordering problem for DAGs like this: + // (ptradd (AssertAlign (ptradd p, c1), k), c2) + // If the outer ptradd is handled first by the DAGCombiner, it can be + // transformed into a disjoint or. Then, when the generic AssertAlign combine + // pushes the AssertAlign through the inner ptradd, it's too late for the + // ptradd reassociation to trigger. + if (!DCI.isBeforeLegalizeOps() && !HasIntermediateAssertAlign && + DAG.haveNoCommonBitsSet(N0, N1)) { +bool TransformCanBreakAddrMode = any_of(N->users(), [&](SDNode *User) { + if (auto *LoadStore = dyn_cast(User); + LoadStore && LoadStore->getBasePtr().getNode() == N) { +unsigned AS = LoadStore->getAddressSpace(); +// Currently, we only really need ptradds to fold offsets into flat +// memory instructions. +if (AS != AMDGPUAS::FLAT_ADDRESS) + return false; +TargetLoweringBase::AddrMode AM; +AM.HasBaseReg = true; +EVT VT = LoadStore->getMemoryVT(); +Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); +return isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS); + } + return false; +}); + +if (!TransformCanBreakAddrMode) + return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint); + } + if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse()) return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 893deb35fe822..64e041103a563 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -100,7 +100,7 @@ define void @baseptr_null(i64 %offset, i8 %v) { ; Taken from implicit-kernarg-backend-usage.ll, tests the PTRADD handling in the ; assertalign DAG combine. -define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) #0 { +define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX942-LABEL: llvm_amdgcn_queue_ptr: ; GFX942: ; %bb.0: ; GFX942-NEXT:v_mov_b32_e32 v2, 0 @@ -416,6 +416,60 @@ entry: ret void } +; Check that ptradds can be lowered to disjoint ORs. +define ptr @gep_disjoint_or(ptr %base) { +; GFX942-LABEL: gep_disjoint_or: +; GFX942: ; %bb.0: +; GFX942-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT:v_and_or_b32 v0, v0, -16, 4 +; GFX942-NEXT:s_setpc_b64 s[30:31] + %p = call ptr @llvm.ptrmask(ptr %base, i64 s0xf0) + %gep = getelementptr nuw inbounds i8, ptr %p, i64 4 + ret ptr %gep +} + +; Check that AssertAlign no
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] DAGCombine PTRADD -> disjoint OR (PR #146075)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/146075 >From 2bc75df51ee121e06467acf47f74b87ae22fd4f7 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Fri, 27 Jun 2025 04:23:50 -0400 Subject: [PATCH 1/3] [AMDGPU][SDAG] DAGCombine PTRADD -> disjoint OR If we can't fold a PTRADD's offset into its users, lowering them to disjoint ORs is preferable: Often, a 32-bit OR instruction suffices where we'd otherwise use a pair of 32-bit additions with carry. This needs to be a DAGCombine (and not a selection rule) because its main purpose is to enable subsequent DAGCombines for bitwise operations. We don't want to just turn PTRADDs into disjoint ORs whenever that's sound because this transform loses the information that the operation implements pointer arithmetic, which we will soon need to fold offsets into FLAT instructions. Currently, disjoint ORs can still be used for offset folding, so that part of the logic can't be tested. The PR contains a hacky workaround for a situation where an AssertAlign operand of a PTRADD is not DAGCombined before the PTRADD, causing the PTRADD to be turned into a disjoint OR although reassociating it with the operand of the AssertAlign would be better. This wouldn't be a problem if the DAGCombiner ensured that a node is only processed after all its operands have been processed. For SWDEV-516125. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 35 .../AMDGPU/ptradd-sdag-optimizations.ll | 56 ++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2f05c240e8032..00e093c42dd4b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -15144,6 +15144,41 @@ SDValue SITargetLowering::performPtrAddCombine(SDNode *N, return Folded; } + // Transform (ptradd a, b) -> (or disjoint a, b) if it is equivalent and if + // that transformation can't block an offset folding at any use of the ptradd. + // This should be done late, after legalization, so that it doesn't block + // other ptradd combines that could enable more offset folding. + bool HasIntermediateAssertAlign = + N0->getOpcode() == ISD::AssertAlign && N0->getOperand(0)->isAnyAdd(); + // This is a hack to work around an ordering problem for DAGs like this: + // (ptradd (AssertAlign (ptradd p, c1), k), c2) + // If the outer ptradd is handled first by the DAGCombiner, it can be + // transformed into a disjoint or. Then, when the generic AssertAlign combine + // pushes the AssertAlign through the inner ptradd, it's too late for the + // ptradd reassociation to trigger. + if (!DCI.isBeforeLegalizeOps() && !HasIntermediateAssertAlign && + DAG.haveNoCommonBitsSet(N0, N1)) { +bool TransformCanBreakAddrMode = any_of(N->users(), [&](SDNode *User) { + if (auto *LoadStore = dyn_cast(User); + LoadStore && LoadStore->getBasePtr().getNode() == N) { +unsigned AS = LoadStore->getAddressSpace(); +// Currently, we only really need ptradds to fold offsets into flat +// memory instructions. +if (AS != AMDGPUAS::FLAT_ADDRESS) + return false; +TargetLoweringBase::AddrMode AM; +AM.HasBaseReg = true; +EVT VT = LoadStore->getMemoryVT(); +Type *AccessTy = VT.getTypeForEVT(*DAG.getContext()); +return isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS); + } + return false; +}); + +if (!TransformCanBreakAddrMode) + return DAG.getNode(ISD::OR, DL, VT, N0, N1, SDNodeFlags::Disjoint); + } + if (N1.getOpcode() != ISD::ADD || !N1.hasOneUse()) return SDValue(); diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 893deb35fe822..64e041103a563 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -100,7 +100,7 @@ define void @baseptr_null(i64 %offset, i8 %v) { ; Taken from implicit-kernarg-backend-usage.ll, tests the PTRADD handling in the ; assertalign DAG combine. -define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) #0 { +define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) { ; GFX942-LABEL: llvm_amdgcn_queue_ptr: ; GFX942: ; %bb.0: ; GFX942-NEXT:v_mov_b32_e32 v2, 0 @@ -416,6 +416,60 @@ entry: ret void } +; Check that ptradds can be lowered to disjoint ORs. +define ptr @gep_disjoint_or(ptr %base) { +; GFX942-LABEL: gep_disjoint_or: +; GFX942: ; %bb.0: +; GFX942-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX942-NEXT:v_and_or_b32 v0, v0, -16, 4 +; GFX942-NEXT:s_setpc_b64 s[30:31] + %p = call ptr @llvm.ptrmask(ptr %base, i64 s0xf0) + %gep = getelementptr nuw inbounds i8, ptr %p, i64 4 + ret ptr %gep +} + +; Check that AssertAlign no
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default (PR #146076)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/146076 >From f5f615abd4cb1d130876b720891b0ac2b58ace9c Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Fri, 27 Jun 2025 05:38:52 -0400 Subject: [PATCH] [AMDGPU][SDAG] Enable ISD::PTRADD for 64-bit AS by default Also removes the command line option to control this feature. There seem to be mainly two kinds of test changes: - Some operands of addition instructions are swapped; that is to be expected since PTRADD is not commutative. - Improvements in code generation, probably because the legacy lowering enabled some transformations that were sometimes harmful. For SWDEV-516125. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 +- .../identical-subrange-spill-infloop.ll | 354 +++--- .../AMDGPU/infer-addrspace-flat-atomic.ll | 14 +- llvm/test/CodeGen/AMDGPU/lds-frame-extern.ll | 8 +- .../AMDGPU/lower-module-lds-via-hybrid.ll | 4 +- .../AMDGPU/lower-module-lds-via-table.ll | 16 +- .../match-perm-extract-vector-elt-bug.ll | 22 +- llvm/test/CodeGen/AMDGPU/memmove-var-size.ll | 16 +- .../AMDGPU/preload-implicit-kernargs.ll | 6 +- .../AMDGPU/promote-constOffset-to-imm.ll | 8 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 7 +- .../AMDGPU/ptradd-sdag-optimizations.ll | 94 ++--- .../AMDGPU/ptradd-sdag-undef-poison.ll| 6 +- llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll | 27 +- llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll | 29 +- 15 files changed, 311 insertions(+), 310 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 2f05c240e8032..361088e29213b 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -63,14 +63,6 @@ static cl::opt UseDivergentRegisterIndexing( cl::desc("Use indirect register addressing for divergent indexes"), cl::init(false)); -// TODO: This option should be removed once we switch to always using PTRADD in -// the SelectionDAG. -static cl::opt UseSelectionDAGPTRADD( -"amdgpu-use-sdag-ptradd", cl::Hidden, -cl::desc("Generate ISD::PTRADD nodes for 64-bit pointer arithmetic in the " - "SelectionDAG ISel"), -cl::init(false)); - static bool denormalModeIsFlushAllF32(const MachineFunction &MF) { const SIMachineFunctionInfo *Info = MF.getInfo(); return Info->getMode().FP32Denormals == DenormalMode::getPreserveSign(); @@ -10599,7 +10591,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, bool SITargetLowering::shouldPreservePtrArith(const Function &F, EVT PtrVT) const { - return UseSelectionDAGPTRADD && PtrVT == MVT::i64; + return PtrVT == MVT::i64; } bool SITargetLowering::canTransformPtrArithOutOfBounds(const Function &F, diff --git a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll index 56ceba258f471..f9fcf489bd389 100644 --- a/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll +++ b/llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll @@ -6,97 +6,151 @@ define void @main(i1 %arg) #0 { ; CHECK: ; %bb.0: ; %bb ; CHECK-NEXT:s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT:s_xor_saveexec_b64 s[4:5], -1 -; CHECK-NEXT:buffer_store_dword v5, off, s[0:3], s32 ; 4-byte Folded Spill -; CHECK-NEXT:buffer_store_dword v6, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill +; CHECK-NEXT:buffer_store_dword v6, off, s[0:3], s32 ; 4-byte Folded Spill +; CHECK-NEXT:buffer_store_dword v7, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill ; CHECK-NEXT:s_mov_b64 exec, s[4:5] -; CHECK-NEXT:v_writelane_b32 v5, s30, 0 -; CHECK-NEXT:v_writelane_b32 v5, s31, 1 -; CHECK-NEXT:v_writelane_b32 v5, s36, 2 -; CHECK-NEXT:v_writelane_b32 v5, s37, 3 -; CHECK-NEXT:v_writelane_b32 v5, s38, 4 -; CHECK-NEXT:v_writelane_b32 v5, s39, 5 -; CHECK-NEXT:v_writelane_b32 v5, s48, 6 -; CHECK-NEXT:v_writelane_b32 v5, s49, 7 -; CHECK-NEXT:v_writelane_b32 v5, s50, 8 -; CHECK-NEXT:v_writelane_b32 v5, s51, 9 -; CHECK-NEXT:v_writelane_b32 v5, s52, 10 -; CHECK-NEXT:v_writelane_b32 v5, s53, 11 -; CHECK-NEXT:v_writelane_b32 v5, s54, 12 -; CHECK-NEXT:v_writelane_b32 v5, s55, 13 -; CHECK-NEXT:s_getpc_b64 s[24:25] -; CHECK-NEXT:v_writelane_b32 v5, s64, 14 -; CHECK-NEXT:s_movk_i32 s4, 0xf0 -; CHECK-NEXT:s_mov_b32 s5, s24 -; CHECK-NEXT:v_writelane_b32 v5, s65, 15 -; CHECK-NEXT:s_load_dwordx16 s[8:23], s[4:5], 0x0 -; CHECK-NEXT:s_mov_b64 s[4:5], 0 -; CHECK-NEXT:v_writelane_b32 v5, s66, 16 -; CHECK-NEXT:s_load_dwordx4 s[4:7], s[4:5], 0x0 -; CHECK-NEXT:v_writelane_b32 v5, s67, 17 -; CHECK-NEXT:s_waitcnt lgkmcnt(0) -; CHECK-NEXT:s_movk_i32 s6, 0x130 -; CHECK-NEXT:s_mov_b32 s7, s24 -; CHECK-NEXT:v_writela
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases (PR #145329)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145329 >From ab1e4801f616eb5ec2abcb2bdc1c00494bc834d1 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 17 Jun 2025 03:51:19 -0400 Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases Pre-committing tests to show improvements in a follow-up PR. --- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 63 ++ .../AMDGPU/ptradd-sdag-optimizations.ll | 206 ++ 2 files changed, 269 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll new file mode 100644 index 0..fab56383ffa8a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s + +; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF. + +define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; GFX6_PTRADD-LABEL: v_add_i32: +; GFX6_PTRADD: ; %bb.0: +; GFX6_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6_PTRADD-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX6_PTRADD-NEXT:s_mov_b32 s7, 0x100f000 +; GFX6_PTRADD-NEXT:s_mov_b32 s10, 0 +; GFX6_PTRADD-NEXT:s_mov_b32 s11, s7 +; GFX6_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX6_PTRADD-NEXT:v_mov_b32_e32 v1, s3 +; GFX6_PTRADD-NEXT:v_add_i32_e32 v0, vcc, s2, v0 +; GFX6_PTRADD-NEXT:v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX6_PTRADD-NEXT:s_mov_b32 s8, s10 +; GFX6_PTRADD-NEXT:s_mov_b32 s9, s10 +; GFX6_PTRADD-NEXT:buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX6_PTRADD-NEXT:buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX6_PTRADD-NEXT:s_mov_b32 s6, -1 +; GFX6_PTRADD-NEXT:s_mov_b32 s4, s0 +; GFX6_PTRADD-NEXT:s_mov_b32 s5, s1 +; GFX6_PTRADD-NEXT:v_add_i32_e32 v0, vcc, v2, v0 +; GFX6_PTRADD-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GFX6_PTRADD-NEXT:s_endpgm +; +; GFX6_LEGACY-LABEL: v_add_i32: +; GFX6_LEGACY: ; %bb.0: +; GFX6_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6_LEGACY-NEXT:s_mov_b32 s7, 0x100f000 +; GFX6_LEGACY-NEXT:s_mov_b32 s10, 0 +; GFX6_LEGACY-NEXT:s_mov_b32 s11, s7 +; GFX6_LEGACY-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX6_LEGACY-NEXT:s_waitcnt lgkmcnt(0) +; GFX6_LEGACY-NEXT:s_mov_b64 s[8:9], s[2:3] +; GFX6_LEGACY-NEXT:v_mov_b32_e32 v1, 0 +; GFX6_LEGACY-NEXT:buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX6_LEGACY-NEXT:buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX6_LEGACY-NEXT:s_mov_b32 s6, -1 +; GFX6_LEGACY-NEXT:s_mov_b32 s4, s0 +; GFX6_LEGACY-NEXT:s_mov_b32 s5, s1 +; GFX6_LEGACY-NEXT:v_add_i32_e32 v0, vcc, v2, v0 +; GFX6_LEGACY-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GFX6_LEGACY-NEXT:s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1 + %a = load volatile i32, ptr addrspace(1) %gep + %b = load volatile i32, ptr addrspace(1) %b_ptr + %result = add i32 %a, %b + store i32 %result, ptr addrspace(1) %out + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX6: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 34bb98550de04..0cd920616c515 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) { %gep = getelementptr inbounds i8, ptr %base, i64 %mul ret ptr %gep } + +; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr. +define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) { +; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm: +; GFX942_PTRADD: ; %bb.0: ; %entry +; GFX942_PTRADD-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_PTRADD-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v2, 1 +; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942_PTRAD
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases (PR #145329)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/145329 >From ab1e4801f616eb5ec2abcb2bdc1c00494bc834d1 Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Tue, 17 Jun 2025 03:51:19 -0400 Subject: [PATCH] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases Pre-committing tests to show improvements in a follow-up PR. --- llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll | 63 ++ .../AMDGPU/ptradd-sdag-optimizations.ll | 206 ++ 2 files changed, 269 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll new file mode 100644 index 0..fab56383ffa8a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-mubuf.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=1 < %s | FileCheck --check-prefixes=GFX6,GFX6_PTRADD %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti -amdgpu-use-sdag-ptradd=0 < %s | FileCheck --check-prefixes=GFX6,GFX6_LEGACY %s + +; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectMUBUF. + +define amdgpu_kernel void @v_add_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { +; GFX6_PTRADD-LABEL: v_add_i32: +; GFX6_PTRADD: ; %bb.0: +; GFX6_PTRADD-NEXT:s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6_PTRADD-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX6_PTRADD-NEXT:s_mov_b32 s7, 0x100f000 +; GFX6_PTRADD-NEXT:s_mov_b32 s10, 0 +; GFX6_PTRADD-NEXT:s_mov_b32 s11, s7 +; GFX6_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX6_PTRADD-NEXT:v_mov_b32_e32 v1, s3 +; GFX6_PTRADD-NEXT:v_add_i32_e32 v0, vcc, s2, v0 +; GFX6_PTRADD-NEXT:v_addc_u32_e32 v1, vcc, 0, v1, vcc +; GFX6_PTRADD-NEXT:s_mov_b32 s8, s10 +; GFX6_PTRADD-NEXT:s_mov_b32 s9, s10 +; GFX6_PTRADD-NEXT:buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX6_PTRADD-NEXT:buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6_PTRADD-NEXT:s_waitcnt vmcnt(0) +; GFX6_PTRADD-NEXT:s_mov_b32 s6, -1 +; GFX6_PTRADD-NEXT:s_mov_b32 s4, s0 +; GFX6_PTRADD-NEXT:s_mov_b32 s5, s1 +; GFX6_PTRADD-NEXT:v_add_i32_e32 v0, vcc, v2, v0 +; GFX6_PTRADD-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GFX6_PTRADD-NEXT:s_endpgm +; +; GFX6_LEGACY-LABEL: v_add_i32: +; GFX6_LEGACY: ; %bb.0: +; GFX6_LEGACY-NEXT:s_load_dwordx4 s[0:3], s[8:9], 0x0 +; GFX6_LEGACY-NEXT:s_mov_b32 s7, 0x100f000 +; GFX6_LEGACY-NEXT:s_mov_b32 s10, 0 +; GFX6_LEGACY-NEXT:s_mov_b32 s11, s7 +; GFX6_LEGACY-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX6_LEGACY-NEXT:s_waitcnt lgkmcnt(0) +; GFX6_LEGACY-NEXT:s_mov_b64 s[8:9], s[2:3] +; GFX6_LEGACY-NEXT:v_mov_b32_e32 v1, 0 +; GFX6_LEGACY-NEXT:buffer_load_dword v2, v[0:1], s[8:11], 0 addr64 glc +; GFX6_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX6_LEGACY-NEXT:buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 offset:4 glc +; GFX6_LEGACY-NEXT:s_waitcnt vmcnt(0) +; GFX6_LEGACY-NEXT:s_mov_b32 s6, -1 +; GFX6_LEGACY-NEXT:s_mov_b32 s4, s0 +; GFX6_LEGACY-NEXT:s_mov_b32 s5, s1 +; GFX6_LEGACY-NEXT:v_add_i32_e32 v0, vcc, v2, v0 +; GFX6_LEGACY-NEXT:buffer_store_dword v0, off, s[4:7], 0 +; GFX6_LEGACY-NEXT:s_endpgm + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid + %b_ptr = getelementptr i32, ptr addrspace(1) %gep, i32 1 + %a = load volatile i32, ptr addrspace(1) %gep + %b = load volatile i32, ptr addrspace(1) %b_ptr + %result = add i32 %a, %b + store i32 %result, ptr addrspace(1) %out + ret void +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX6: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index 34bb98550de04..0cd920616c515 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -291,3 +291,209 @@ define ptr @fold_mul24_into_mad(ptr %base, i64 %a, i64 %b) { %gep = getelementptr inbounds i8, ptr %base, i64 %mul ret ptr %gep } + +; Test PTRADD handling in AMDGPUDAGToDAGISel::SelectGlobalSAddr. +define amdgpu_kernel void @uniform_base_varying_offset_imm(ptr addrspace(1) %p) { +; GFX942_PTRADD-LABEL: uniform_base_varying_offset_imm: +; GFX942_PTRADD: ; %bb.0: ; %entry +; GFX942_PTRADD-NEXT:s_load_dwordx2 s[0:1], s[4:5], 0x0 +; GFX942_PTRADD-NEXT:v_and_b32_e32 v0, 0x3ff, v0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v1, 0 +; GFX942_PTRADD-NEXT:v_lshlrev_b32_e32 v0, 2, v0 +; GFX942_PTRADD-NEXT:v_mov_b32_e32 v2, 1 +; GFX942_PTRADD-NEXT:s_waitcnt lgkmcnt(0) +; GFX942_PTRADD-NEXT:v_lshl_add_u64 v[0:1], s[0:1], 0, v[0:1] +; GFX942_PTRAD
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns (PR #143881)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/143881 >From 531b230f3a828d5f39cf0d2393d18d961d6be42d Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Thu, 12 Jun 2025 07:44:37 -0400 Subject: [PATCH] [AMDGPU][SDAG] Handle ISD::PTRADD in VOP3 patterns This patch mirrors similar patterns for ISD::ADD. The main difference is that ISD::ADD is commutative, so that a pattern definition for, e.g., (add (mul x, y), z), automatically also handles (add z, (mul x, y)). ISD::PTRADD is not commutative, so we would need to handle these cases explicitly. This patch only implements (ptradd z, (op x, y)) patterns, where the nested operation (shift or multiply) is the offset of the ptradd (i.e., the right operand), since base pointers that are the result of a shift or multiply seem less likely. For SWDEV-516125. --- llvm/lib/Target/AMDGPU/VOP3Instructions.td| 36 +++- .../AMDGPU/ptradd-sdag-optimizations.ll | 41 ++ llvm/test/CodeGen/AMDGPU/ptradd-sdag.ll | 42 +++ 3 files changed, 52 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 279de32a9cee8..4548beadf23ae 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -512,12 +512,13 @@ let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue defm: Ternary_i16_Pats_gfx9; } // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts -class ThreeOpFragSDAG : PatFrag< +class ThreeOpFragSDAG : PatFrag< (ops node:$x, node:$y, node:$z), // When the inner operation is used multiple times, selecting 3-op // instructions may still be beneficial -- if the other users can be // combined similarly. Let's be conservative for now. - (op2 (HasOneUseBinOp node:$x, node:$y), node:$z), + !if(op1IsRight, (op2 node:$z, (HasOneUseBinOp node:$x, node:$y)), + (op2 (HasOneUseBinOp node:$x, node:$y), node:$z)), [{ // Only use VALU ops when the result is divergent. if (!N->isDivergent()) @@ -544,7 +545,10 @@ class ThreeOpFragSDAG : PatFrag< let PredicateCodeUsesOperands = 1; } -class ThreeOpFrag : ThreeOpFragSDAG { +// Matches (op2 (op1 x, y), z) if op1IsRight = 0 and +// matches (op2 z, (op1, x, y)) if op1IsRight = 1. +class ThreeOpFrag : ThreeOpFragSDAG { // The divergence predicate is irrelevant in GlobalISel, as we have // proper register bank checks. We just need to verify the constant // bus restriction when all the sources are considered. @@ -834,12 +838,19 @@ def : GCNPat< (DivergentBinFrag i32:$src0, IsPow2Plus1:$src1), (V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>; -let SubtargetPredicate = HasLshlAddU64Inst in +let SubtargetPredicate = HasLshlAddU64Inst in { def : GCNPat< (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) >; +def : GCNPat < + // (ptradd z, (shl x, y)) -> ((x << y) + z) + (ThreeOpFrag i64:$src0, i32:$src1, i64:$src2), + (V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2) +>; +} // End SubtargetPredicate = HasLshlAddU64Inst + def : VOPBinOpClampPat; def : VOPBinOpClampPat; @@ -908,19 +919,24 @@ multiclass IMAD32_Pats { // Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul. // We need to separate this because otherwise OtherPredicates would be overriden. -class IMAD32_Mul24_Pat: GCNPat < -(i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)), -(inst $src0, $src1, $src2, 0 /* clamp */) ->; +class IMAD32_Mul24_Pats_Impl : GCNPat < +!if(mulIsRight, (i64 (AddOp i64:$src2, (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1, +(i64 (AddOp (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2))), +(inst $src0, $src1, $src2, 0 /* clamp */)>; + +multiclass IMAD32_Mul24_Pats { + def : IMAD32_Mul24_Pats_Impl; + def : IMAD32_Mul24_Pats_Impl; +} // exclude pre-GFX9 where it was slow let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in { defm : IMAD32_Pats; - def : IMAD32_Mul24_Pat; + defm : IMAD32_Mul24_Pats; } let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in { defm : IMAD32_Pats; - def : IMAD32_Mul24_Pat; + defm : IMAD32_Mul24_Pats; } def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3_OPSEL> { diff --git a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll index d48bfe0bb7f21..34bb98550de04 100644 --- a/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll +++ b/llvm/test/CodeGen/AMDGPU/ptradd-sdag-optimizations.ll @@ -266,18 +266,11 @@ define amdgpu_kernel void @fold_mad64(ptr addrspace(1) %p) { ; Use non-zero shift amounts in v_lshl_add_u64. define ptr @select_v_lshl_add_
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #148108)
@@ -173,6 +173,10 @@ template class CodeGenPassBuilder { // LLVMTM ctor. See TargetMachine::setGlobalISel for example. if (Opt.EnableIPRA) TM.Options.EnableIPRA = *Opt.EnableIPRA; +else { + // If not explicitly specified, use target default. + TM.Options.EnableIPRA |= TM.useIPRA(); +} cdevadas wrote: ```suggestion if (Opt.EnableIPRA) { TM.Options.EnableIPRA = *Opt.EnableIPRA; } else { // If not explicitly specified, use target default. TM.Options.EnableIPRA |= TM.useIPRA(); } ``` https://github.com/llvm/llvm-project/pull/148108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #148108)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #148107)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148107 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #148109)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148109 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Add isRequired to passes missing it (PR #148115)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148115 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Clear MachineFunctions without using PA (PR #148113)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148113 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #148111)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148111 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #148112)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148112 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #148110)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Clear MachineFunctions without using PA (PR #148113)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148113 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Stitch up loop passes in codegen pipeline (PR #148114)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148114 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Clear MachineFunctions without using PA (PR #148113)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148113 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #148111)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148111 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #148108)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #148109)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148109 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #148110)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Add isRequired to passes missing it (PR #148115)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148115 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #148107)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148107 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #148112)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148112 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #148111)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148111 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #148111)
@@ -579,8 +579,10 @@ template class CodeGenPassBuilder { void insertPass(InsertedPassT &&Pass) const { AfterCallbacks.emplace_back( [&](StringRef Name, MachineFunctionPassManager &MFPM) mutable { - if (Name == TargetPassT::name()) -MFPM.addPass(std::forward(Pass)); + if (Name == TargetPassT::name()) { +if (runBeforeAdding(InsertedPassT::name())) + MFPM.addPass(std::forward(Pass)); + } cdevadas wrote: ```suggestion if (Name == TargetPassT::name() && runBeforeAdding(InsertedPassT::name())) MFPM.addPass(std::forward(Pass)); ``` https://github.com/llvm/llvm-project/pull/148111 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Stitch up loop passes in codegen pipeline (PR #148114)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148114 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Add isRequired to passes missing it (PR #148115)
https://github.com/cdevadas approved this pull request. https://github.com/llvm/llvm-project/pull/148115 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Account inserted passes for -start/stop options (PR #148111)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148111 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][NPM] Fill in addPreSched2 passes (PR #148112)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148112 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Clear MachineFunctions without using PA (PR #148113)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148113 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #148110)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #148108)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #148107)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148107 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Register Function Passes (PR #148109)
https://github.com/vikramRH edited https://github.com/llvm/llvm-project/pull/148109 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 43535be - Revert "[RISCV] AddEdge between mask producer and user of V0 (#146855)"
Author: Pengcheng Wang Date: 2025-07-11T18:59:23+08:00 New Revision: 43535be8ab3f6ffadd161358823d90c713c9d7be URL: https://github.com/llvm/llvm-project/commit/43535be8ab3f6ffadd161358823d90c713c9d7be DIFF: https://github.com/llvm/llvm-project/commit/43535be8ab3f6ffadd161358823d90c713c9d7be.diff LOG: Revert "[RISCV] AddEdge between mask producer and user of V0 (#146855)" This reverts commit aee21c368b41cd5f7765a31b9dbe77f2bffadd4e. Added: Modified: llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll Removed: diff --git a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp index 5464612d86bee..be54a8c95a978 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorMaskDAGMutation.cpp @@ -10,10 +10,6 @@ // instructions and masked instructions, so that we can reduce the live range // overlaps of mask registers. // -// If there are multiple masks producers followed by multiple masked -// instructions, then at each masked instructions add dependency edges between -// every producer and masked instruction. -// // The reason why we need to do this: // 1. When tracking register pressure, we don't track physical registers. // 2. We have a RegisterClass for mask register (which is `VMV0`), but we don't @@ -72,25 +68,11 @@ class RISCVVectorMaskDAGMutation : public ScheduleDAGMutation { void apply(ScheduleDAGInstrs *DAG) override { SUnit *NearestUseV0SU = nullptr; -SmallVector DefMask; for (SUnit &SU : DAG->SUnits) { const MachineInstr *MI = SU.getInstr(); - if (isSoleUseCopyToV0(SU)) -DefMask.push_back(&SU); - - if (MI->findRegisterUseOperand(RISCV::V0, TRI)) { + if (MI->findRegisterUseOperand(RISCV::V0, TRI)) NearestUseV0SU = &SU; -// Copy may not be a real use, so skip it here. -if (DefMask.size() > 1 && !MI->isCopy()) - for (SUnit *Def : DefMask) -if (DAG->canAddEdge(Def, &SU)) - DAG->addEdge(Def, SDep(&SU, SDep::Artificial)); - -if (!DefMask.empty()) - DefMask.erase(DefMask.begin()); - } - if (NearestUseV0SU && NearestUseV0SU != &SU && isSoleUseCopyToV0(SU) && // For LMUL=8 cases, there will be more possibilities to spill. // FIXME: We should use RegPressureTracker to do fine-grained diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll index 2d4fce68f9545..0d8aff306252e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-reduce-add-to-vcpop.ll @@ -313,12 +313,12 @@ define i32 @test_nxv128i1( %x) { ; CHECK-NEXT:vslidedown.vx v0, v6, a0 ; CHECK-NEXT:vsetvli a2, zero, e8, m1, ta, ma ; CHECK-NEXT:vslidedown.vx v6, v7, a1 -; CHECK-NEXT:vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT:vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT:vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT:vslidedown.vx v0, v7, a0 ; CHECK-NEXT:vslidedown.vx v5, v6, a0 +; CHECK-NEXT:vslidedown.vx v4, v7, a0 ; CHECK-NEXT:vsetvli a0, zero, e32, m8, ta, mu +; CHECK-NEXT:vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT:vmv1r.v v0, v4 ; CHECK-NEXT:vadd.vi v8, v8, 1, v0.t ; CHECK-NEXT:vmv1r.v v0, v5 ; CHECK-NEXT:vadd.vi v16, v16, 1, v0.t @@ -425,15 +425,13 @@ define i32 @test_nxv256i1( %x) { ; CHECK-NEXT:vmerge.vim v16, v8, 1, v0 ; CHECK-NEXT:vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT:vslidedown.vx v0, v5, a1 -; CHECK-NEXT:vsetvli a2, zero, e32, m8, ta, ma -; CHECK-NEXT:vmerge.vim v8, v8, 1, v0 -; CHECK-NEXT:vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT:vslidedown.vx v0, v6, a1 +; CHECK-NEXT:vslidedown.vx v5, v7, a1 +; CHECK-NEXT:vslidedown.vx v4, v6, a1 ; CHECK-NEXT:vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT:vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT:vmv1r.v v0, v4 ; CHECK-NEXT:vadd.vi v8, v8, 1, v0.t -; CHECK-NEXT:vsetvli a2, zero, e8, mf2, ta, ma -; CHECK-NEXT:vslidedown.vx v0, v7, a1 -; CHECK-NEXT:vsetvli a2, zero, e32, m8, ta, mu +; CHECK-NEXT:vmv1r.v v0, v5 ; CHECK-NEXT:vadd.vi v16, v16, 1, v0.t ; CHECK-NEXT:vadd.vv v8, v16, v8 ; CHECK-NEXT:addi a2, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 15417da962bd3..796f8dde58f47 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extracte
[llvm-branch-commits] [flang] [flang][OpenMP] Use OmpDirectiveSpecification in DISPATCH (PR #148008)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/148008 >From 1bcd318939236190a30cfb3259bcb9ca972f1fd3 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 10 Jul 2025 10:18:32 -0500 Subject: [PATCH] [flang][OpenMP] Use OmpDirectiveSpecification in DISPATCH Dispatch is the last construct (after ATOMIC and ALLOCATORS) where the associated block requires a specific form. Using OmpDirectiveSpecification for the begin and the optional end directives will make the structure of all block directives more uniform. --- flang/include/flang/Parser/dump-parse-tree.h | 2 - flang/include/flang/Parser/parse-tree.h | 12 +--- flang/lib/Parser/openmp-parsers.cpp | 36 +++--- flang/lib/Parser/unparse.cpp | 12 +--- flang/lib/Semantics/check-omp-structure.cpp | 25 --- flang/test/Parser/OpenMP/dispatch.f90| 73 flang/test/Semantics/OpenMP/dispatch.f90 | 22 +++--- 7 files changed, 99 insertions(+), 83 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 73c224e3ad235..32b6ca45609b6 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -710,8 +710,6 @@ class ParseTreeDumper { NODE(parser, OpenMPDepobjConstruct) NODE(parser, OpenMPUtilityConstruct) NODE(parser, OpenMPDispatchConstruct) - NODE(parser, OmpDispatchDirective) - NODE(parser, OmpEndDispatchDirective) NODE(parser, OpenMPFlushConstruct) NODE(parser, OpenMPLoopConstruct) NODE(parser, OpenMPExecutableAllocate) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index fbc39286a95bf..ab2dde7d5dfbe 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -4939,19 +4939,11 @@ struct OpenMPDepobjConstruct { //nocontext-clause | //novariants-clause | //nowait-clause -struct OmpDispatchDirective { - TUPLE_CLASS_BOILERPLATE(OmpDispatchDirective); - CharBlock source; - std::tuple t; -}; - -EMPTY_CLASS(OmpEndDispatchDirective); - struct OpenMPDispatchConstruct { TUPLE_CLASS_BOILERPLATE(OpenMPDispatchConstruct); CharBlock source; - std::tuple> + std::tuple> t; }; diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 811ca2c855a6e..d70aaab82cbab 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -1302,6 +1302,32 @@ TYPE_PARSER(sourced( // construct( "ALLOCATORS"_tok >= OmpAllocatorsConstructParser{}))) +struct OmpDispatchConstructParser { + using resultType = OpenMPDispatchConstruct; + + std::optional Parse(ParseState &state) const { +auto dirSpec{Parser{}.Parse(state)}; +if (!dirSpec || dirSpec->DirId() != llvm::omp::Directive::OMPD_dispatch) { + return std::nullopt; +} + +// This should be a function call. That will be checked in semantics. +Block block; +if (auto stmt{attempt(Parser{}).Parse(state)}) { + block.emplace_back(std::move(*stmt)); +} +// Allow empty block. Check for this in semantics. + +auto end{OmpEndDirectiveParser{llvm::omp::Directive::OMPD_dispatch}}; +return OpenMPDispatchConstruct{ +std::move(*dirSpec), std::move(block), *maybe(end).Parse(state)}; + } +}; + +TYPE_PARSER(sourced( // +construct( +"DISPATCH"_tok >= OmpDispatchConstructParser{}))) + // Parser for an arbitrary OpenMP ATOMIC construct. // // Depending on circumstances, an ATOMIC construct applies to one or more @@ -1631,16 +1657,6 @@ TYPE_PARSER(sourced(construct(verbatim("CRITICAL"_tok), TYPE_PARSER(construct( Parser{}, block, Parser{})) -TYPE_PARSER(sourced(construct( -verbatim("DISPATCH"_tok), Parser{}))) - -TYPE_PARSER( -construct(startOmpLine >> "END DISPATCH"_tok)) - -TYPE_PARSER(sourced(construct( -Parser{} / endOmpLine, block, -maybe(Parser{} / endOmpLine - // 2.11.3 Executable Allocate directive TYPE_PARSER( sourced(construct(verbatim("ALLOCATE"_tok), diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 4f692f3e9084f..b66d756bdbf2c 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2758,6 +2758,9 @@ class UnparseVisitor { Put("\n"); EndOpenMP(); } + void Unparse(const OpenMPDispatchConstruct &x) { // +UnparseBlockConstruct(x); + } void Unparse(const OpenMPRequiresConstruct &y) { BeginOpenMP(); Word("!$OMP REQUIRES "); @@ -2777,15 +2780,6 @@ class UnparseVisitor { Walk(x.v); return false; } - void Unparse(const OmpDispatchDirective &x) { -Word("!$OMP DISPATCH"); -Walk(x.t); -Put("\n"); - } - void Unparse(const OmpEndDispatchDirective &) { -Word("!$OMP END DISPATCH"); -Put("\n"); - } void Unparse(const OmpErrorDirective &x) {
[llvm-branch-commits] [clang] [LifetimeSafety] Add script for performance benchmarking (PR #147315)
https://github.com/usx95 updated https://github.com/llvm/llvm-project/pull/147315 >From 7d9009c2f22bf3f0980f7fd811be3185192490cf Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Mon, 7 Jul 2025 15:13:00 + Subject: [PATCH] [LifetimeSafety] Add script performance benchmarking --- .../Analysis/LifetimeSafety/CMakeLists.txt| 49 +++ .../test/Analysis/LifetimeSafety/benchmark.py | 307 ++ .../Analysis/LifetimeSafety/requirements.txt | 2 + clang/test/CMakeLists.txt | 2 + 4 files changed, 360 insertions(+) create mode 100644 clang/test/Analysis/LifetimeSafety/CMakeLists.txt create mode 100644 clang/test/Analysis/LifetimeSafety/benchmark.py create mode 100644 clang/test/Analysis/LifetimeSafety/requirements.txt diff --git a/clang/test/Analysis/LifetimeSafety/CMakeLists.txt b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt new file mode 100644 index 0..ce37a29655668 --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/CMakeLists.txt @@ -0,0 +1,49 @@ +# = +# Lifetime Analysis Benchmarking Target +# = +# This target allows running performance benchmarks for the clang lifetime analysis +# using a Python script (with managed dependencies). + +find_package(Python3 COMPONENTS Interpreter REQUIRED) + +# Define paths for the virtual environment and requirements file. +set(LIFETIME_BENCHMARK_SCRIPT + "${CMAKE_CURRENT_SOURCE_DIR}/benchmark.py") +set(LIFETIME_BENCHMARK_VENV_DIR "${CMAKE_CURRENT_BINARY_DIR}/benchmark-venv") +set(LIFETIME_BENCHMARK_REQUIREMENTS + "${CMAKE_CURRENT_SOURCE_DIR}/requirements.txt") +set(LIFETIME_BENCHMARK_OUTPUT_DIR + "${CMAKE_CURRENT_BINARY_DIR}/benchmark_results") + + +if(EXISTS ${LIFETIME_BENCHMARK_SCRIPT} AND EXISTS ${LIFETIME_BENCHMARK_REQUIREMENTS}) + + # Set up the virtual environment and install packages + add_custom_command( +OUTPUT ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg +COMMAND ${Python3_EXECUTABLE} -m venv ${LIFETIME_BENCHMARK_VENV_DIR} +COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python -m pip install -r ${LIFETIME_BENCHMARK_REQUIREMENTS} +DEPENDS ${LIFETIME_BENCHMARK_REQUIREMENTS} +COMMENT "Creating Python virtual environment and installing dependencies for benchmark..." + ) + add_custom_target(benchmark_venv_setup +DEPENDS ${LIFETIME_BENCHMARK_VENV_DIR}/pyvenv.cfg + ) + + # Main benchmark target + add_custom_target(benchmark_lifetime_safety_analysis +COMMAND ${LIFETIME_BENCHMARK_VENV_DIR}/bin/python ${LIFETIME_BENCHMARK_SCRIPT} +--clang-binary ${LLVM_BINARY_DIR}/bin/clang +--output-dir ${LIFETIME_BENCHMARK_OUTPUT_DIR} + +DEPENDS clang benchmark_venv_setup + +# Display the output directly in the console. +USES_TERMINAL + +COMMENT "Running Lifetime Analysis performance benchmarks..." + ) + + set_target_properties(benchmark_lifetime_safety_analysis +PROPERTIES FOLDER "Clang/Benchmarks") +endif() diff --git a/clang/test/Analysis/LifetimeSafety/benchmark.py b/clang/test/Analysis/LifetimeSafety/benchmark.py new file mode 100644 index 0..9d5f36c51b9ee --- /dev/null +++ b/clang/test/Analysis/LifetimeSafety/benchmark.py @@ -0,0 +1,307 @@ +import sys +import argparse +import subprocess +import tempfile +import json +import os +from datetime import datetime +import numpy as np +from scipy.optimize import curve_fit +from scipy.stats import t + + +def generate_cpp_cycle_test(n: int) -> str: +""" +Generates a C++ code snippet with a specified number of pointers in a cycle. +Creates a while loop that rotates N pointers. +This pattern tests the convergence speed of the dataflow analysis when +reaching its fixed point. + +Example: +struct MyObj { int id; ~MyObj() {} }; + +void long_cycle_4(bool condition) { +MyObj v1{1}; +MyObj v2{1}; +MyObj v3{1}; +MyObj v4{1}; + +MyObj* p1 = &v1; +MyObj* p2 = &v2; +MyObj* p3 = &v3; +MyObj* p4 = &v4; + +while (condition) { +MyObj* temp = p1; +p1 = p2; +p2 = p3; +p3 = p4; +p4 = temp; +} +} +""" +if n <= 0: +return "// Number of variables must be positive." + +cpp_code = "struct MyObj { int id; ~MyObj() {} };\n\n" +cpp_code += f"void long_cycle_{n}(bool condition) {{\n" +for i in range(1, n + 1): +cpp_code += f" MyObj v{i}{{1}};\n" +cpp_code += "\n" +for i in range(1, n + 1): +cpp_code += f" MyObj* p{i} = &v{i};\n" + +cpp_code += "\n while (condition) {\n" +if n > 0: +cpp_code += f"MyObj* temp = p1;\n" +for i in range(1, n): +cpp_code += f"p{i} = p{i+1};\n" +cpp_code += f"
[llvm-branch-commits] [clang] [LifetimeSafety] Add expired loans analysis (PR #148222)
https://github.com/usx95 created https://github.com/llvm/llvm-project/pull/148222 None >From 14a9c8b50df11ce48ce15d0fbe29568b3e23b5a6 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Fri, 11 Jul 2025 11:11:47 + Subject: [PATCH] [LifetimeSafety] Add expired loans analysis --- clang/lib/Analysis/LifetimeSafety.cpp | 140 ++ 1 file changed, 140 insertions(+) diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index e72192aa92c1a..88ec70d000d2c 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -735,6 +735,142 @@ class LifetimeDataflow { } }; +// = // +// Expired Loans Analysis +// = // + +/// The lattice for tracking expired loans. It is a set of loan IDs. +struct ExpiredLattice { + LoanSet Expired; + + ExpiredLattice() = default; + explicit ExpiredLattice(LoanSet S) : Expired(S) {} + + bool operator==(const ExpiredLattice &Other) const { +return Expired == Other.Expired; + } + bool operator!=(const ExpiredLattice &Other) const { +return !(*this == Other); + } + + /// Computes the union of two lattices. + ExpiredLattice join(const ExpiredLattice &Other, + LoanSet::Factory &Factory) const { +LoanSet JoinedSet = Expired; +for (LoanID LID : Other.Expired) + JoinedSet = Factory.add(JoinedSet, LID); +return ExpiredLattice(JoinedSet); + } + + void dump(llvm::raw_ostream &OS) const { +OS << "ExpiredLattice State:\n"; +if (Expired.isEmpty()) + OS << " \n"; +for (const LoanID &LID : Expired) + OS << " Loan " << LID << " is expired\n"; + } +}; + +/// Transfer function for the expired loans analysis. +class ExpiredLoansTransferer { + FactManager &AllFacts; + LoanSet::Factory &SetFactory; + +public: + explicit ExpiredLoansTransferer(FactManager &F, LoanSet::Factory &SF) + : AllFacts(F), SetFactory(SF) {} + + /// Computes the exit state of a block by applying all its facts sequentially + /// to a given entry state. + ExpiredLattice transferBlock(const CFGBlock *Block, +ExpiredLattice EntryState) { +ExpiredLattice BlockState = EntryState; +llvm::ArrayRef Facts = AllFacts.getFacts(Block); + +for (const Fact *F : Facts) { + BlockState = transferFact(BlockState, F); +} +return BlockState; + } + +private: + ExpiredLattice transferFact(ExpiredLattice In, const Fact *F) { +if (const auto *EF = F->getAs()) + return ExpiredLattice(SetFactory.add(In.Expired, EF->getLoanID())); + +if (const auto *IF = F->getAs()) + return ExpiredLattice(SetFactory.remove(In.Expired, IF->getLoanID())); + +return In; + } +}; + +/// Dataflow analysis driver for tracking expired loans. +class ExpiredLoansAnalysis { + const CFG &Cfg; + AnalysisDeclContext &AC; + LoanSet::Factory SetFactory; + ExpiredLoansTransferer Xfer; + + llvm::DenseMap BlockEntryStates; + llvm::DenseMap BlockExitStates; + +public: + ExpiredLoansAnalysis(const CFG &C, FactManager &FS, AnalysisDeclContext &AC) + : Cfg(C), AC(AC), Xfer(FS, SetFactory) {} + + void run() { +llvm::TimeTraceScope TimeProfile("Expired Loans Analysis"); +ForwardDataflowWorklist Worklist(Cfg, AC); +const CFGBlock *Entry = &Cfg.getEntry(); +BlockEntryStates[Entry] = ExpiredLattice(SetFactory.getEmptySet()); +Worklist.enqueueBlock(Entry); +while (const CFGBlock *B = Worklist.dequeue()) { + ExpiredLattice EntryState = getEntryState(B); + ExpiredLattice ExitState = Xfer.transferBlock(B, EntryState); + BlockExitStates[B] = ExitState; + + for (const CFGBlock *Successor : B->succs()) { +auto SuccIt = BlockEntryStates.find(Successor); +ExpiredLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) +? SuccIt->second +: ExpiredLattice{}; +ExpiredLattice NewSuccEntryState = +OldSuccEntryState.join(ExitState, SetFactory); +if (SuccIt == BlockEntryStates.end() || +NewSuccEntryState != OldSuccEntryState) { + BlockEntryStates[Successor] = NewSuccEntryState; + Worklist.enqueueBlock(Successor); +} + } +} + } + + void dump() const { +llvm::dbgs() << "==\n"; +llvm::dbgs() << " Expired Loans Results:\n"; +llvm::dbgs() << "==\n"; +const CFGBlock &B = Cfg.getExit(); +getExitState(&B).dump(llvm::dbgs()); + } + + ExpiredLattice getEntryState(const CFGBlock *B) const { +auto It = BlockEntryStates.find(B); +if (It != BlockEntryStates.end()) { + return It->second; +} +return ExpiredLattice(SetFac
[llvm-branch-commits] [clang] [LifetimeSafety] Add expired loans analysis (PR #148222)
usx95 wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/148222?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#148222** https://app.graphite.dev/github/pr/llvm/llvm-project/148222?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/148222?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#148065** https://app.graphite.dev/github/pr/llvm/llvm-project/148065?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>: 1 other dependent PR ([#147315](https://github.com/llvm/llvm-project/pull/147315) https://app.graphite.dev/github/pr/llvm/llvm-project/147315?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/>) * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/148222 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [LifetimeSafety] Add expired loans analysis (PR #148222)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff HEAD~1 HEAD --extensions cpp -- clang/lib/Analysis/LifetimeSafety.cpp `` View the diff from clang-format here. ``diff diff --git a/clang/lib/Analysis/LifetimeSafety.cpp b/clang/lib/Analysis/LifetimeSafety.cpp index 88ec70d00..d95ffcca7 100644 --- a/clang/lib/Analysis/LifetimeSafety.cpp +++ b/clang/lib/Analysis/LifetimeSafety.cpp @@ -783,7 +783,7 @@ public: /// Computes the exit state of a block by applying all its facts sequentially /// to a given entry state. ExpiredLattice transferBlock(const CFGBlock *Block, -ExpiredLattice EntryState) { + ExpiredLattice EntryState) { ExpiredLattice BlockState = EntryState; llvm::ArrayRef Facts = AllFacts.getFacts(Block); @@ -833,8 +833,8 @@ public: for (const CFGBlock *Successor : B->succs()) { auto SuccIt = BlockEntryStates.find(Successor); ExpiredLattice OldSuccEntryState = (SuccIt != BlockEntryStates.end()) -? SuccIt->second -: ExpiredLattice{}; + ? SuccIt->second + : ExpiredLattice{}; ExpiredLattice NewSuccEntryState = OldSuccEntryState.join(ExitState, SetFactory); if (SuccIt == BlockEntryStates.end() || `` https://github.com/llvm/llvm-project/pull/148222 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Use OmpDirectiveSpecification in DISPATCH (PR #148008)
https://github.com/Stylie777 edited https://github.com/llvm/llvm-project/pull/148008 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Use OmpDirectiveSpecification in DISPATCH (PR #148008)
https://github.com/Stylie777 approved this pull request. LGTM. One small nit comment but not critical. https://github.com/llvm/llvm-project/pull/148008 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][OpenMP] Use OmpDirectiveSpecification in DISPATCH (PR #148008)
@@ -1302,6 +1302,32 @@ TYPE_PARSER(sourced( // construct( "ALLOCATORS"_tok >= OmpAllocatorsConstructParser{}))) +struct OmpDispatchConstructParser { + using resultType = OpenMPDispatchConstruct; + + std::optional Parse(ParseState &state) const { +auto dirSpec{Parser{}.Parse(state)}; +if (!dirSpec || dirSpec->DirId() != llvm::omp::Directive::OMPD_dispatch) { + return std::nullopt; +} + +// This should be a function call. That will be checked in semantics. +Block block; +if (auto stmt{attempt(Parser{}).Parse(state)}) { + block.emplace_back(std::move(*stmt)); +} +// Allow empty block. Check for this in semantics. + +auto end{OmpEndDirectiveParser{llvm::omp::Directive::OMPD_dispatch}}; +return OpenMPDispatchConstruct{ +std::move(*dirSpec), std::move(block), *maybe(end).Parse(state)}; + } +}; + +TYPE_PARSER(sourced( // Stylie777 wrote: nit: Extra `//` https://github.com/llvm/llvm-project/pull/148008 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/148241 None >From 3596b1726514dda5942d99d4779852e01367c764 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Fri, 11 Jul 2025 13:10:44 + Subject: [PATCH] [DA] Add check for base pointer invariance --- llvm/lib/Analysis/DependenceAnalysis.cpp | 16 .../DependenceAnalysis/FlipFlopBaseAddress.ll | 18 +- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 428342f51ad2e..07bf560772c8c 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,18 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { +LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); +return std::make_unique(Src, Dst, +SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll index 843c18a6e0d1e..a357018563be1 100644 --- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll +++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll @@ -8,11 +8,11 @@ define float @bug41488_test1(float %f) { ; CHECK-LABEL: 'bug41488_test1' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -34,11 +34,11 @@ for.cond.cleanup: define void @bug41488_test2(i32 %n) { ; CHECK-LABEL: 'bug41488_test2' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float 0.00e+00, ptr %q, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl ; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %cmp8 = icmp sgt i32 %n, 1 @@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader, define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) { ; CHECK-LABEL: 'bug53942_bar' ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: br label %for.cond @@ -173,7 +173,7 @@ for.end:
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
llvmbot wrote: @llvm/pr-subscribers-llvm-analysis Author: Ryotaro Kasuga (kasuga-fj) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/148241.diff 2 Files Affected: - (modified) llvm/lib/Analysis/DependenceAnalysis.cpp (+16) - (modified) llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll (+9-9) ``diff diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 428342f51ad2e..07bf560772c8c 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,18 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { +LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); +return std::make_unique(Src, Dst, +SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll index 843c18a6e0d1e..a357018563be1 100644 --- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll +++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll @@ -8,11 +8,11 @@ define float @bug41488_test1(float %f) { ; CHECK-LABEL: 'bug41488_test1' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -34,11 +34,11 @@ for.cond.cleanup: define void @bug41488_test2(i32 %n) { ; CHECK-LABEL: 'bug41488_test2' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float 0.00e+00, ptr %q, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl ; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %cmp8 = icmp sgt i32 %n, 1 @@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader, define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) { ; CHECK-LABEL: 'bug53942_bar' ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: br label %for.cond @@ -173,7 +173,7 @@ for.end: ; preds = %for.cond.cleanup define void @non_invariant_baseptr_with_identical_obj(ptr %A) {
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/148241 >From d914ea5f3c44387570cab65ce9a507ebf429f827 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Fri, 11 Jul 2025 13:10:44 + Subject: [PATCH] [DA] Add check for base pointer invariance --- llvm/lib/Analysis/DependenceAnalysis.cpp | 16 .../DependenceAnalysis/FlipFlopBaseAddress.ll | 18 +- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 428342f51ad2e..07bf560772c8c 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,18 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { +LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); +return std::make_unique(Src, Dst, +SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll index 3e3426afab0f7..52cab0f77e73e 100644 --- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll +++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll @@ -8,11 +8,11 @@ define float @bug41488_test1(float %f) { ; CHECK-LABEL: 'bug41488_test1' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -34,11 +34,11 @@ for.cond.cleanup: define void @bug41488_test2(i32 %n) { ; CHECK-LABEL: 'bug41488_test2' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float 0.00e+00, ptr %q, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl ; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %cmp8 = icmp sgt i32 %n, 1 @@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader, define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) { ; CHECK-LABEL: 'bug53942_bar' ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: br label %for.cond @@ -173,7 +173,7 @@ for.end:
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
https://github.com/kasuga-fj edited https://github.com/llvm/llvm-project/pull/148241 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Offload] Add `olGetSymbolInfo[Size]` (PR #147962)
jhuber6 wrote: > @jhuber6 Given that this interface matches the interface of other handles, > any change to how it fundamentally works should probably involve updating all > other getInfo queries. If we do decide to replace the Size variants, I think > that should be done as a separate task that touches everything. > > For now, I think it makes sense to match other handle points, and leave any > refactors for device info as a separate change that touches everything. Sure, we can get in and then rework all the get info's to just have a separate query for the size. I think that's much cleaner and keeps the number of API functions more clear. I'll accept this for now but we should definitely do that later. https://github.com/llvm/llvm-project/pull/147962 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Offload] Add `olGetSymbolInfo[Size]` (PR #147962)
https://github.com/jhuber6 approved this pull request. https://github.com/llvm/llvm-project/pull/147962 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
https://github.com/kasuga-fj edited https://github.com/llvm/llvm-project/pull/148241 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Add check for base pointer invariance (PR #148241)
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/148241 >From d914ea5f3c44387570cab65ce9a507ebf429f827 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Fri, 11 Jul 2025 13:10:44 + Subject: [PATCH 1/2] [DA] Add check for base pointer invariance --- llvm/lib/Analysis/DependenceAnalysis.cpp | 16 .../DependenceAnalysis/FlipFlopBaseAddress.ll | 18 +- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 428342f51ad2e..07bf560772c8c 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3383,6 +3383,10 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst, SrcSubscripts, DstSubscripts)) return false; + assert(isLoopInvariant(SrcBase, SrcLoop) && + isLoopInvariant(DstBase, DstLoop) && + "Expected SrcBase and DstBase to be loop invariant"); + int Size = SrcSubscripts.size(); LLVM_DEBUG({ dbgs() << "\nSrcSubscripts: "; @@ -3666,6 +3670,18 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst, SCEVUnionPredicate(Assume, *SE)); } + // Even if the base pointers are the same, they may not be loop-invariant. It + // could lead to incorrect results, as we're analyzing loop-carried + // dependencies. + Loop *SrcLoop = LI->getLoopFor(Src->getParent()); + Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + if (!isLoopInvariant(SrcBase, SrcLoop) || + !isLoopInvariant(DstBase, DstLoop)) { +LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n"); +return std::make_unique(Src, Dst, +SCEVUnionPredicate(Assume, *SE)); + } + uint64_t EltSize = SrcLoc.Size.toRaw(); const SCEV *SrcEv = SE->getMinusSCEV(SrcSCEV, SrcBase); const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase); diff --git a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll index 3e3426afab0f7..52cab0f77e73e 100644 --- a/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll +++ b/llvm/test/Analysis/DependenceAnalysis/FlipFlopBaseAddress.ll @@ -8,11 +8,11 @@ define float @bug41488_test1(float %f) { ; CHECK-LABEL: 'bug41488_test1' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float %f, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float %f, ptr %q, align 4 --> Dst: store float %f, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -34,11 +34,11 @@ for.cond.cleanup: define void @bug41488_test2(i32 %n) { ; CHECK-LABEL: 'bug41488_test2' ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: %0 = load float, ptr %p, align 4 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load float, ptr %p, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store float 0.00e+00, ptr %q, align 4 --> Dst: store float 0.00e+00, ptr %q, align 4 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %g = alloca float, align 4 @@ -68,7 +68,7 @@ define void @bug53942_foo(i32 noundef %n, ptr noalias nocapture noundef writeonl ; CHECK-NEXT: Src: %.pre = load double, ptr %B, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %.pre, ptr %arrayidx2, align 8 --> Dst: store double %.pre, ptr %arrayidx2, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: %cmp8 = icmp sgt i32 %n, 1 @@ -99,11 +99,11 @@ for.body: ; preds = %for.body.preheader, define void @bug53942_bar(i32 noundef %n, ptr noalias noundef %A, ptr noalias noundef %B) { ; CHECK-LABEL: 'bug53942_bar' ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: %0 = load double, ptr %arrayidx, align 8 -; CHECK-NEXT:da analyze - input [*]! +; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: %0 = load double, ptr %arrayidx, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 ; CHECK-NEXT:da analyze - confused! ; CHECK-NEXT: Src: store double %0, ptr %arrayidx8, align 8 --> Dst: store double %0, ptr %arrayidx8, align 8 -; CHECK-NEXT:da analyze - output [*]! +; CHECK-NEXT:da analyze - confused! ; entry: br label %for.cond @@ -173,7 +173,7 @@ for.end:
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #148110)
@@ -0,0 +1,28 @@ +//===- llvm/CodeGen/ProcessImplicitDefs.h ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// + +#ifndef LLVM_CODEGEN_PROCESSIMPLICITDEFS_H +#define LLVM_CODEGEN_PROCESSIMPLICITDEFS_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class ProcessImplicitDefsPass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, +MachineFunctionAnalysisManager &MFAM); + MachineFunctionProperties getRequiredProperties() const { +return MachineFunctionProperties().set( +MachineFunctionProperties::Property::IsSSA); s-barannikov wrote: ```suggestion return MachineFunctionProperties().setIsSSA(); ``` https://github.com/llvm/llvm-project/pull/148110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #148110)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/148110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AMDGPU][SDAG] Test ISD::PTRADD handling in various special cases (PR #145329)
https://github.com/arsenm approved this pull request. https://github.com/llvm/llvm-project/pull/145329 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Port ProcessImplicitDefs to NPM (PR #148110)
arsenm wrote: tag format is wrong, missing email https://github.com/llvm/llvm-project/pull/148110 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Offload] Add global variable address/size queries (PR #147972)
https://github.com/RossBrunton updated https://github.com/llvm/llvm-project/pull/147972 >From 77a4183117cd259584c1bb4136aa27dd2b9548b0 Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 10 Jul 2025 15:34:17 +0100 Subject: [PATCH] [Offload] Add global variable address/size queries Add two new symbol info types for getting the bounds of a global variable. As well as a number of tests for reading/writing to it. --- offload/liboffload/API/Symbol.td | 4 +- offload/liboffload/src/OffloadImpl.cpp| 19 offload/tools/offload-tblgen/PrintGen.cpp | 8 +- .../unittests/OffloadAPI/memory/olMemcpy.cpp | 105 ++ .../OffloadAPI/symbol/olGetSymbolInfo.cpp | 28 + .../OffloadAPI/symbol/olGetSymbolInfoSize.cpp | 14 +++ 6 files changed, 175 insertions(+), 3 deletions(-) diff --git a/offload/liboffload/API/Symbol.td b/offload/liboffload/API/Symbol.td index 9317c71df1f10..2e94d703809e7 100644 --- a/offload/liboffload/API/Symbol.td +++ b/offload/liboffload/API/Symbol.td @@ -39,7 +39,9 @@ def : Enum { let desc = "Supported symbol info."; let is_typed = 1; let etors = [ -TaggedEtor<"KIND", "ol_symbol_kind_t", "The kind of this symbol."> +TaggedEtor<"KIND", "ol_symbol_kind_t", "The kind of this symbol.">, +TaggedEtor<"GLOBAL_VARIABLE_ADDRESS", "void *", "The address in memory for this global variable.">, +TaggedEtor<"GLOBAL_VARIABLE_SIZE", "size_t", "The size in bytes for this global variable.">, ]; } diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 6d98c33ffb8da..17a2b00cb7140 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -753,9 +753,28 @@ Error olGetSymbolInfoImplDetail(ol_symbol_handle_t Symbol, void *PropValue, size_t *PropSizeRet) { InfoWriter Info(PropSize, PropValue, PropSizeRet); + auto CheckKind = [&](ol_symbol_kind_t Required) { +if (Symbol->Kind != Required) { + std::string ErrBuffer; + llvm::raw_string_ostream(ErrBuffer) + << PropName << ": Expected a symbol of Kind " << Required + << " but given a symbol of Kind " << Symbol->Kind; + return Plugin::error(ErrorCode::SYMBOL_KIND, ErrBuffer.c_str()); +} +return Plugin::success(); + }; + switch (PropName) { case OL_SYMBOL_INFO_KIND: return Info.write(Symbol->Kind); + case OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS: +if (auto Err = CheckKind(OL_SYMBOL_KIND_GLOBAL_VARIABLE)) + return Err; +return Info.write(std::get(Symbol->PluginImpl).getPtr()); + case OL_SYMBOL_INFO_GLOBAL_VARIABLE_SIZE: +if (auto Err = CheckKind(OL_SYMBOL_KIND_GLOBAL_VARIABLE)) + return Err; +return Info.write(std::get(Symbol->PluginImpl).getSize()); default: return createOffloadError(ErrorCode::INVALID_ENUMERATION, "olGetSymbolInfo enum '%i' is invalid", PropName); diff --git a/offload/tools/offload-tblgen/PrintGen.cpp b/offload/tools/offload-tblgen/PrintGen.cpp index d1189688a90a3..89d7c820426cf 100644 --- a/offload/tools/offload-tblgen/PrintGen.cpp +++ b/offload/tools/offload-tblgen/PrintGen.cpp @@ -74,8 +74,12 @@ inline void printTagged(llvm::raw_ostream &os, const void *ptr, {0} value, size_ if (Type == "char[]") { OS << formatv(TAB_2 "printPtr(os, (const char*) ptr);\n"); } else { - OS << formatv(TAB_2 "const {0} * const tptr = (const {0} * const)ptr;\n", -Type); + if (Type == "void *") +OS << formatv(TAB_2 "void * const * const tptr = (void * " +"const * const)ptr;\n"); + else +OS << formatv( +TAB_2 "const {0} * const tptr = (const {0} * const)ptr;\n", Type); // TODO: Handle other cases here OS << TAB_2 "os << (const void *)tptr << \" (\";\n"; if (Type.ends_with("*")) { diff --git a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp index c1762b451b81d..c1fb6df9bad0d 100644 --- a/offload/unittests/OffloadAPI/memory/olMemcpy.cpp +++ b/offload/unittests/OffloadAPI/memory/olMemcpy.cpp @@ -13,6 +13,32 @@ using olMemcpyTest = OffloadQueueTest; OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemcpyTest); +struct olMemcpyGlobalTest : OffloadGlobalTest { + void SetUp() override { +RETURN_ON_FATAL_FAILURE(OffloadGlobalTest::SetUp()); +ASSERT_SUCCESS( +olGetSymbol(Program, "read", OL_SYMBOL_KIND_KERNEL, &ReadKernel)); +ASSERT_SUCCESS( +olGetSymbol(Program, "write", OL_SYMBOL_KIND_KERNEL, &WriteKernel)); +ASSERT_SUCCESS(olCreateQueue(Device, &Queue)); +ASSERT_SUCCESS(olGetSymbolInfo( +Global, OL_SYMBOL_INFO_GLOBAL_VARIABLE_ADDRESS, sizeof(Addr), &Addr)); + +LaunchArgs.Dimensions = 1; +LaunchArgs.GroupSize = {64, 1, 1}; +LaunchArgs.NumGroups = {1, 1, 1}; + +LaunchArgs.DynSharedMemory = 0; + } + + ol_kernel
[llvm-branch-commits] [clang] [CIR] Upstream ComplexImagPtrOp for ComplexType (PR #144236)
@@ -1775,6 +1775,44 @@ OpFoldResult cir::ComplexCreateOp::fold(FoldAdaptor adaptor) { return cir::ConstComplexAttr::get(realAttr, imagAttr); } +//===--===// +// ComplexRealPtrOp +//===--===// + +LogicalResult cir::ComplexRealPtrOp::verify() { + mlir::Type resultPointeeTy = getType().getPointee(); + cir::PointerType operandPtrTy = getOperand().getType(); + auto operandPointeeTy = + mlir::cast(operandPtrTy.getPointee()); + + if (resultPointeeTy != operandPointeeTy.getElementType()) { +emitOpError() +<< "cir.complex.real_ptr result type does not match operand type"; +return failure(); xlauko wrote: ```suggestion return emitOpError() << "cir.complex.real_ptr result type does not match operand type"; ``` https://github.com/llvm/llvm-project/pull/144236 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [CIR] Upstream ComplexImagPtrOp for ComplexType (PR #144236)
@@ -2385,4 +2385,62 @@ def ComplexCreateOp : CIR_Op<"complex.create", [Pure, SameTypeOperands]> { let hasFolder = 1; } +//===--===// +// ComplexRealPtrOp +//===--===// + +def ComplexRealPtrOp : CIR_Op<"complex.real_ptr", [Pure]> { + let summary = "Derive a pointer to the real part of a complex value"; + let description = [{ +`cir.complex.real_ptr` operation takes a pointer operand that points to a +complex value of type `!cir.complex` and yields a pointer to the real part +of the operand. + +Example: + +```mlir +%1 = cir.complex.real_ptr %0 : !cir.ptr> -> !cir.ptr +``` + }]; + + let results = (outs CIR_PtrToIntOrFloatType:$result); + let arguments = (ins CIR_PtrToComplexType:$operand); + + let assemblyFormat = [{ +$operand `:` +qualified(type($operand)) `->` qualified(type($result)) attr-dict + }]; + + let hasVerifier = 1; +} + +//===--===// +// ComplexImagPtrOp +//===--===// + +def ComplexImagPtrOp : CIR_Op<"complex.imag_ptr", [Pure]> { + let summary = "Derive a pointer to the imaginary part of a complex value"; + let description = [{ +`cir.complex.imag_ptr` operation takes a pointer operand that points to a +complex value of type `!cir.complex` and yields a pointer to the imaginary +part of the operand. + +Example: + +```mlir +%1 = cir.complex.imag_ptr %0 : !cir.ptr> -> !cir.ptr xlauko wrote: fix to 80 cols https://github.com/llvm/llvm-project/pull/144236 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [CIR] Upstream ComplexImagPtrOp for ComplexType (PR #144236)
@@ -1775,6 +1775,44 @@ OpFoldResult cir::ComplexCreateOp::fold(FoldAdaptor adaptor) { return cir::ConstComplexAttr::get(realAttr, imagAttr); } +//===--===// +// ComplexRealPtrOp +//===--===// + +LogicalResult cir::ComplexRealPtrOp::verify() { + mlir::Type resultPointeeTy = getType().getPointee(); + cir::PointerType operandPtrTy = getOperand().getType(); + auto operandPointeeTy = + mlir::cast(operandPtrTy.getPointee()); + + if (resultPointeeTy != operandPointeeTy.getElementType()) { +emitOpError() +<< "cir.complex.real_ptr result type does not match operand type"; +return failure(); + } + + return success(); +} + +//===--===// +// ComplexImagPtrOp +//===--===// + +LogicalResult cir::ComplexImagPtrOp::verify() { + mlir::Type resultPointeeTy = getType().getPointee(); + cir::PointerType operandPtrTy = getOperand().getType(); + auto operandPointeeTy = + mlir::cast(operandPtrTy.getPointee()); + + if (resultPointeeTy != operandPointeeTy.getElementType()) { +emitOpError() +<< "cir.complex.imag_ptr result type does not match operand type"; +return failure(); xlauko wrote: ```suggestion return emitOpError() << "cir.complex.imag_ptr result type does not match operand type"; ``` https://github.com/llvm/llvm-project/pull/144236 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [CIR] Upstream ComplexImagPtrOp for ComplexType (PR #144236)
@@ -1775,6 +1775,44 @@ OpFoldResult cir::ComplexCreateOp::fold(FoldAdaptor adaptor) { return cir::ConstComplexAttr::get(realAttr, imagAttr); } +//===--===// xlauko wrote: These are copy/paste. Can you extract one common implementation. https://github.com/llvm/llvm-project/pull/144236 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/142391 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #148108)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/148108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] Read TargetMachine's EnableIPRA option (PR #148108)
https://github.com/optimisan approved this pull request. https://github.com/llvm/llvm-project/pull/148108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #148107)
https://github.com/optimisan approved this pull request. https://github.com/llvm/llvm-project/pull/148107 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen][NPM] VirtRegRewriter: Set VirtReg flag (PR #148107)
https://github.com/optimisan edited https://github.com/llvm/llvm-project/pull/148107 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits