Author: Max Kazantsev Date: 2020-11-27T11:19:54+07:00 New Revision: faf183874cb6f434815e2b1c0b59cd452219f89e
URL: https://github.com/llvm/llvm-project/commit/faf183874cb6f434815e2b1c0b59cd452219f89e DIFF: https://github.com/llvm/llvm-project/commit/faf183874cb6f434815e2b1c0b59cd452219f89e.diff LOG: [IndVars] LCSSA Phi users should not prevent widening When widening an IndVar that has LCSSA Phi users outside the loop, we can safely widen it as usual and then truncate the result outside the loop without hurting the performance. Differential Revision: https://reviews.llvm.org/D91593 Reviewed By: skatkov Added: Modified: llvm/lib/Transforms/Utils/SimplifyIndVar.cpp llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp index 290f3671afca..d37fe74a0039 100644 --- a/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -1542,16 +1542,26 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { auto AnotherOpExtKind = ExtKind; // Check that all uses are either s/zext, or narrow def (in case of we are - // widening the IV increment). + // widening the IV increment), or single-input LCSSA Phis. SmallVector<Instruction *, 4> ExtUsers; + SmallVector<PHINode *, 4> LCSSAPhiUsers; for (Use &U : NarrowUse->uses()) { - if (U.getUser() == NarrowDef) + Instruction *User = cast<Instruction>(U.getUser()); + if (User == NarrowDef) continue; - Instruction *User = nullptr; + if (!L->contains(User)) { + auto *LCSSAPhi = cast<PHINode>(User); + // Make sure there is only 1 input, so that we don't have to split + // critical edges. + if (LCSSAPhi->getNumOperands() != 1) + return false; + LCSSAPhiUsers.push_back(LCSSAPhi); + continue; + } if (ExtKind == SignExtended) - User = dyn_cast<SExtInst>(U.getUser()); + User = dyn_cast<SExtInst>(User); else - User = dyn_cast<ZExtInst>(U.getUser()); + User = dyn_cast<ZExtInst>(User); if (!User || User->getType() != WideType) return false; ExtUsers.push_back(User); @@ -1630,6 +1640,21 @@ bool WidenIV::widenWithVariantUse(WidenIV::NarrowIVDefUse DU) { User->replaceAllUsesWith(WideBO); DeadInsts.emplace_back(User); } + + for (PHINode *User : LCSSAPhiUsers) { + assert(User->getNumOperands() == 1 && "Checked before!"); + Builder.SetInsertPoint(User); + auto *WidePN = + Builder.CreatePHI(WideBO->getType(), 1, User->getName() + ".wide"); + BasicBlock *LoopExitingBlock = User->getParent()->getSinglePredecessor(); + assert(LoopExitingBlock && L->contains(LoopExitingBlock) && + "Not a LCSSA Phi?"); + WidePN->addIncoming(WideBO, LoopExitingBlock); + Builder.SetInsertPoint(User->getParent()->getFirstNonPHI()); + auto *TruncPN = Builder.CreateTrunc(WidePN, User->getType()); + User->replaceAllUsesWith(TruncPN); + DeadInsts.emplace_back(User); + } return true; } diff --git a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll index 2bb37d23866e..fb9b198fe8af 100644 --- a/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ b/llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -697,20 +697,18 @@ define i32 @test14(i32 %start, i32* %p, i32* %q) { ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: ret i32 [[TMP2]] ; CHECK: failure: ; CHECK-NEXT: unreachable ; @@ -750,24 +748,23 @@ define i32 @test15(i32 %start, i32* %p, i32* %q) { ; CHECK: loop: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] ; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[FOO:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[FOO]] to i64 -; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[TMP1]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 -; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 +; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q:%.*]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: -; CHECK-NEXT: call void @use(i32 -1) -; CHECK-NEXT: ret i32 -1 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 -1 to i32 +; CHECK-NEXT: call void @use(i32 [[TMP2]]) +; CHECK-NEXT: ret i32 [[TMP2]] ; CHECK: failure: -; CHECK-NEXT: [[FOO_LCSSA1:%.*]] = phi i32 [ [[FOO]], [[BACKEDGE]] ] -; CHECK-NEXT: call void @use(i32 [[FOO_LCSSA1]]) +; CHECK-NEXT: [[FOO_LCSSA1_WIDE:%.*]] = phi i64 [ [[TMP1]], [[BACKEDGE]] ] +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[FOO_LCSSA1_WIDE]] to i32 +; CHECK-NEXT: call void @use(i32 [[TMP3]]) ; CHECK-NEXT: unreachable ; entry: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits