Author: Jameson Nash Date: 2026-01-21T14:14:05-05:00 New Revision: 366712217e5455a8e2a24c43832c0b4ef609a80f
URL: https://github.com/llvm/llvm-project/commit/366712217e5455a8e2a24c43832c0b4ef609a80f DIFF: https://github.com/llvm/llvm-project/commit/366712217e5455a8e2a24c43832c0b4ef609a80f.diff LOG: Revert "[MemCpyOpt] support offset slices for performStackMoveOptzn and proce…" This reverts commit 019eb855dd6a18a8f7ae5dd86abf6bc3ad0d9fa4. Added: Modified: llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp Removed: llvm/test/Transforms/MemCpyOpt/stack-move-offset.ll ################################################################################ diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h index b87ea0b9d243f..496d2958fc2d0 100644 --- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h +++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h @@ -16,7 +16,6 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/PassManager.h" -#include "llvm/Support/Compiler.h" namespace llvm { @@ -55,12 +54,12 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> { public: MemCpyOptPass() = default; - LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); // Glue for the old PM. - LLVM_ABI bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, - AssumptionCache *AC, DominatorTree *DT, - PostDominatorTree *PDT, MemorySSA *MSSA); + bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, + AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, + MemorySSA *MSSA); private: // Helper functions @@ -86,8 +85,8 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> { Value *ByteVal); bool moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI); bool performStackMoveOptzn(Instruction *Load, Instruction *Store, - Value *DestPtr, Value *SrcPtr, TypeSize Size, - BatchAAResults &BAA); + AllocaInst *DestAlloca, AllocaInst *SrcAlloca, + TypeSize Size, BatchAAResults &BAA); bool isMemMoveMemSetDependency(MemMoveInst *M); void eraseInstruction(Instruction *I); diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp index d8d784f2ff774..5de3dfa148314 100644 --- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -726,15 +726,18 @@ bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI, // If this is a load-store pair from a stack slot to a stack slot, we // might be able to perform the stack-move optimization just as we do for // memcpys from an alloca to an alloca. - if (performStackMoveOptzn(LI, SI, SI->getPointerOperand(), - LI->getPointerOperand(), DL.getTypeStoreSize(T), - BAA)) { - // Avoid invalidating the iterator. - BBI = SI->getNextNode()->getIterator(); - eraseInstruction(SI); - eraseInstruction(LI); - ++NumMemCpyInstr; - return true; + if (auto *DestAlloca = dyn_cast<AllocaInst>(SI->getPointerOperand())) { + if (auto *SrcAlloca = dyn_cast<AllocaInst>(LI->getPointerOperand())) { + if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca, + DL.getTypeStoreSize(T), BAA)) { + // Avoid invalidating the iterator. + BBI = SI->getNextNode()->getIterator(); + eraseInstruction(SI); + eraseInstruction(LI); + ++NumMemCpyInstr; + return true; + } + } } return false; @@ -1494,47 +1497,20 @@ bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy, // transformation only because we restrict the scope of this optimization to // allocas that aren't captured. bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, - Value *DestPtr, Value *SrcPtr, - TypeSize Size, BatchAAResults &BAA) { + AllocaInst *DestAlloca, + AllocaInst *SrcAlloca, TypeSize Size, + BatchAAResults &BAA) { LLVM_DEBUG(dbgs() << "Stack Move: Attempting to optimize:\n" << *Store << "\n"); - AllocaInst *DestAlloca = dyn_cast<AllocaInst>(getUnderlyingObject(DestPtr)); - if (!DestAlloca) - return false; - - AllocaInst *SrcAlloca = dyn_cast<AllocaInst>(getUnderlyingObject(SrcPtr)); - if (!SrcAlloca) - return false; - - // Explicitly don't handle degenerate case of a partial copy within one - // alloca. It would always fail the dominator check later anyways, and - // possibly the modref checks also. - if (SrcAlloca == DestAlloca) - return false; - // Make sure the two allocas are in the same address space. if (SrcAlloca->getAddressSpace() != DestAlloca->getAddressSpace()) { LLVM_DEBUG(dbgs() << "Stack Move: Address space mismatch\n"); return false; } - if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) - return false; - // Check that copy is full with static size. const DataLayout &DL = DestAlloca->getDataLayout(); - - auto DestOffset = DestPtr->getPointerOffsetFrom(DestAlloca, DL); - if (!DestOffset) - return false; - - auto SrcOffset = SrcPtr->getPointerOffsetFrom(SrcAlloca, DL); - if (!SrcOffset || *SrcOffset < *DestOffset || *SrcOffset < 0) - return false; - // Offset diff erence must preserve dest alloca's alignment. - if ((*SrcOffset - *DestOffset) % DestAlloca->getAlign().value() != 0) - return false; std::optional<TypeSize> SrcSize = SrcAlloca->getAllocationSize(DL); std::optional<TypeSize> DestSize = DestAlloca->getAllocationSize(DL); if (!SrcSize || !DestSize) @@ -1542,12 +1518,14 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, if (*SrcSize != *DestSize) if (!SrcSize->isFixed() || !DestSize->isFixed()) return false; - // Check that copy covers entirety of dest alloca. - if (Size != *DestSize || *DestOffset != 0) { + if (Size != *DestSize) { LLVM_DEBUG(dbgs() << "Stack Move: Destination alloca size mismatch\n"); return false; } + if (!SrcAlloca->isStaticAlloca() || !DestAlloca->isStaticAlloca()) + return false; + // Check if it will be legal to combine allocas without breaking dominator. bool MoveSrc = !DT->dominates(SrcAlloca, DestAlloca); if (MoveSrc) { @@ -1711,13 +1689,7 @@ bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store, } // Merge the two allocas. - Value *NewDestPtr = SrcAlloca; - if (*SrcOffset != *DestOffset) { - IRBuilder<> Builder(DestAlloca); - NewDestPtr = Builder.CreateInBoundsPtrAdd( - SrcAlloca, Builder.getInt64(*SrcOffset - *DestOffset)); - } - DestAlloca->replaceAllUsesWith(NewDestPtr); + DestAlloca->replaceAllUsesWith(SrcAlloca); eraseInstruction(DestAlloca); // Drop metadata on the source alloca. @@ -1788,7 +1760,7 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { return false; // If copying from a constant, try to turn the memcpy into a memset. - if (auto *GV = dyn_cast<GlobalVariable>(getUnderlyingObject(M->getSource()))) + if (auto *GV = dyn_cast<GlobalVariable>(M->getSource())) if (GV->isConstant() && GV->hasDefinitiveInitializer()) if (Value *ByteVal = isBytewiseValue(GV->getInitializer(), M->getDataLayout())) { @@ -1874,10 +1846,16 @@ bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) { // If the transfer is from a stack slot to a stack slot, then we may be able // to perform the stack-move optimization. See the comments in // performStackMoveOptzn() for more details. + auto *DestAlloca = dyn_cast<AllocaInst>(M->getDest()); + if (!DestAlloca) + return false; + auto *SrcAlloca = dyn_cast<AllocaInst>(M->getSource()); + if (!SrcAlloca) + return false; ConstantInt *Len = dyn_cast<ConstantInt>(M->getLength()); if (Len == nullptr) return false; - if (performStackMoveOptzn(M, M, M->getDest(), M->getSource(), + if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca, TypeSize::getFixed(Len->getZExtValue()), BAA)) { // Avoid invalidating the iterator. BBI = M->getNextNode()->getIterator(); diff --git a/llvm/test/Transforms/MemCpyOpt/stack-move-offset.ll b/llvm/test/Transforms/MemCpyOpt/stack-move-offset.ll deleted file mode 100644 index fdf5e838b20e5..0000000000000 --- a/llvm/test/Transforms/MemCpyOpt/stack-move-offset.ll +++ /dev/null @@ -1,238 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 -; RUN: opt < %s -passes=memcpyopt -verify-memoryssa -S | FileCheck %s - -; Test that stack-move optimization works when src is a GEP into an alloca. -; For the optimization to trigger: -; - The copy must cover the entire dest alloca (Size == DestSize, DestOffset == 0) -; - SrcOffset must be a multiple of DestAlloca's alignment -; - SrcOffset must be non-negative - -declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) -declare void @llvm.lifetime.start.p0(ptr nocapture) -declare void @llvm.lifetime.end.p0(ptr nocapture) - -declare void @use_nocapture(ptr nocapture) - -; Basic test: memcpy from GEP(src) to dest alloca -; src = [16 x i8], dest = [8 x i8] align 8, copy 8 bytes -; SrcOffset(8) is a multiple of DestAlign(8), so optimization applies. -; After optimization: dest uses become src+8 -define void @memcpy_src_gep_to_dest_alloca() { -; CHECK-LABEL: define void @memcpy_src_gep_to_dest_alloca() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 8 -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8 -; CHECK-NEXT: [[SRC_GEP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8 -; CHECK-NEXT: store i64 42, ptr [[SRC_GEP1]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP1]]) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP]]) -; CHECK-NEXT: ret void -; - %src = alloca [16 x i8], align 4 - %dest = alloca [8 x i8], align 8 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - %src.gep = getelementptr inbounds i8, ptr %src, i64 8 - store i64 42, ptr %src.gep - call void @use_nocapture(ptr nocapture %src.gep) - - call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src.gep, i64 8, i1 false) - - call void @use_nocapture(ptr nocapture %dest) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} - -; Test: memcpy from GEP(src) to dest alloca with diff erent offset -; src = [12 x i8], dest = [8 x i8] align 4, copy 8 bytes from src+4 -; SrcOffset(4) is a multiple of DestAlign(4), so optimization applies. -; After optimization: dest uses become src+4 -define void @memcpy_src_gep_offset4_to_dest_alloca() { -; CHECK-LABEL: define void @memcpy_src_gep_offset4_to_dest_alloca() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [12 x i8], align 4 -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: [[SRC_GEP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: store i64 42, ptr [[SRC_GEP1]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP1]]) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP]]) -; CHECK-NEXT: ret void -; - %src = alloca [12 x i8], align 4 - %dest = alloca [8 x i8], align 4 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - %src.gep = getelementptr inbounds i8, ptr %src, i64 4 - store i64 42, ptr %src.gep - call void @use_nocapture(ptr nocapture %src.gep) - - call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src.gep, i64 8, i1 false) - - call void @use_nocapture(ptr nocapture %dest) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} - -; Test: load/store from GEP(src) to dest alloca -; src = [8 x i8], dest = [4 x i8] align 4, load/store 4 bytes from src+4 -; SrcOffset(4) is a multiple of DestAlign(4), so optimization applies. -define void @load_store_src_gep_to_dest_alloca() { -; CHECK-LABEL: define void @load_store_src_gep_to_dest_alloca() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 4 -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: [[SRC_GEP1:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: store i32 42, ptr [[SRC_GEP1]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP1]]) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP]]) -; CHECK-NEXT: ret void -; - %src = alloca [8 x i8], align 4 - %dest = alloca [4 x i8], align 4 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - %src.gep = getelementptr inbounds i8, ptr %src, i64 4 - store i32 42, ptr %src.gep - call void @use_nocapture(ptr nocapture %src.gep) - - %val = load i32, ptr %src.gep - store i32 %val, ptr %dest - - call void @use_nocapture(ptr nocapture %dest) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} - -; Test: both src and dest are direct allocas (no offset), same size -; This is the basic stack-move case, included here for completeness. -define void @memcpy_both_direct_allocas() { -; CHECK-LABEL: define void @memcpy_both_direct_allocas() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i8], align 8 -; CHECK-NEXT: store i64 42, ptr [[SRC]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC]]) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC]]) -; CHECK-NEXT: ret void -; - %src = alloca [8 x i8], align 4 - %dest = alloca [8 x i8], align 8 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - store i64 42, ptr %src - call void @use_nocapture(ptr nocapture %src) - - call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 4 %src, i64 8, i1 false) - - call void @use_nocapture(ptr nocapture %dest) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} - -; Negative test: dest has offset (dest is GEP, not direct alloca) -; The optimization requires DestOffset == 0. -define void @no_optimize_dest_has_offset() { -; CHECK-LABEL: define void @no_optimize_dest_has_offset() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [16 x i8], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[DEST]]) -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8 -; CHECK-NEXT: [[DEST_GEP:%.*]] = getelementptr inbounds i8, ptr [[DEST]], i64 8 -; CHECK-NEXT: store i64 42, ptr [[SRC_GEP]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST_GEP]], ptr align 4 [[SRC_GEP]], i64 8, i1 false) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[DEST_GEP]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[DEST]]) -; CHECK-NEXT: ret void -; - %src = alloca [16 x i8], align 4 - %dest = alloca [16 x i8], align 8 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - %src.gep = getelementptr inbounds i8, ptr %src, i64 8 - %dest.gep = getelementptr inbounds i8, ptr %dest, i64 8 - store i64 42, ptr %src.gep - call void @use_nocapture(ptr nocapture %src.gep) - - call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest.gep, ptr align 4 %src.gep, i64 8, i1 false) - - call void @use_nocapture(ptr nocapture %dest.gep) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} - -; Negative test: copy doesn't cover entire dest alloca (Size != DestSize) -; src = [12 x i8], dest = [16 x i8], copy only 8 bytes -define void @no_optimize_partial_dest_copy() { -; CHECK-LABEL: define void @no_optimize_partial_dest_copy() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [12 x i8], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [16 x i8], align 4 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[DEST]]) -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: store i64 42, ptr [[SRC_GEP]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[DEST]], ptr align 4 [[SRC_GEP]], i64 8, i1 false) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[DEST]]) -; CHECK-NEXT: ret void -; - %src = alloca [12 x i8], align 4 - %dest = alloca [16 x i8], align 4 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - %src.gep = getelementptr inbounds i8, ptr %src, i64 4 - store i64 42, ptr %src.gep - call void @use_nocapture(ptr nocapture %src.gep) - - call void @llvm.memcpy.p0.p0.i64(ptr align 4 %dest, ptr align 4 %src.gep, i64 8, i1 false) - - call void @use_nocapture(ptr nocapture %dest) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} - -; Negative test: SrcOffset not a multiple of DestAlign -; src = [12 x i8] with offset 4, dest = [8 x i8] align 8 -; SrcOffset(4) % DestAlign(8) = 4 != 0 -> rejected -define void @no_optimize_alignment_mismatch() { -; CHECK-LABEL: define void @no_optimize_alignment_mismatch() { -; CHECK-NEXT: [[SRC:%.*]] = alloca [12 x i8], align 4 -; CHECK-NEXT: [[DEST:%.*]] = alloca [8 x i8], align 8 -; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[DEST]]) -; CHECK-NEXT: [[SRC_GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: store i64 42, ptr [[SRC_GEP]], align 4 -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[SRC_GEP]]) -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[DEST]], ptr align 4 [[SRC_GEP]], i64 8, i1 false) -; CHECK-NEXT: call void @use_nocapture(ptr captures(none) [[DEST]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[SRC]]) -; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[DEST]]) -; CHECK-NEXT: ret void -; - %src = alloca [12 x i8], align 4 - %dest = alloca [8 x i8], align 8 - call void @llvm.lifetime.start.p0(ptr %src) - call void @llvm.lifetime.start.p0(ptr %dest) - %src.gep = getelementptr inbounds i8, ptr %src, i64 4 - store i64 42, ptr %src.gep - call void @use_nocapture(ptr nocapture %src.gep) - - call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dest, ptr align 4 %src.gep, i64 8, i1 false) - - call void @use_nocapture(ptr nocapture %dest) - - call void @llvm.lifetime.end.p0(ptr %src) - call void @llvm.lifetime.end.p0(ptr %dest) - ret void -} _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
