Author: Florian Hahn Date: 2021-06-11T14:06:38+01:00 New Revision: 95227e4faaa5640cc4c0601124f596ce027effce
URL: https://github.com/llvm/llvm-project/commit/95227e4faaa5640cc4c0601124f596ce027effce DIFF: https://github.com/llvm/llvm-project/commit/95227e4faaa5640cc4c0601124f596ce027effce.diff LOG: Wrap Added: Modified: llvm/lib/Analysis/ScalarEvolution.cpp llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll Removed: ################################################################################ diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 7484ce67c2f2..f6c727f743fd 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -10051,7 +10051,32 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, return (FlagsPresent & ExpectedFlags) == ExpectedFlags; }; + auto MatchBinaryAddToConst2 = [this](const SCEV *X, const SCEV *Y, + APInt &OutC1, APInt &OutC2, + SCEV::NoWrapFlags ExpectedFlags) { + const SCEV *XNonConstOp, *XConstOp; + const SCEV *YNonConstOp, *YConstOp; + SCEV::NoWrapFlags XFlagsPresent; + SCEV::NoWrapFlags YFlagsPresent; + + if (!splitBinaryAdd(X, XConstOp, XNonConstOp, XFlagsPresent) || + !isa<SCEVConstant>(XConstOp)) + return false; + + if (!splitBinaryAdd(Y, YConstOp, YNonConstOp, YFlagsPresent) || + !isa<SCEVConstant>(YConstOp)) + return false; + + if (XFlagsPresent != YFlagsPresent || YNonConstOp != XNonConstOp) + return false; + + OutC1 = cast<SCEVConstant>(XConstOp)->getAPInt(); + OutC2 = cast<SCEVConstant>(YConstOp)->getAPInt(); + return (XFlagsPresent & ExpectedFlags) == ExpectedFlags; + }; + APInt C; + APInt C2; switch (Pred) { default: @@ -10069,6 +10094,10 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && !C.isStrictlyPositive()) return true; + + if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.sle(C2)) + return true; + break; case ICmpInst::ICMP_SGT: @@ -10083,6 +10112,10 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, // (X + C)<nsw> s< X if C < 0 if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) return true; + + if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.slt(C2)) + return true; + break; case ICmpInst::ICMP_UGE: @@ -10092,6 +10125,10 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, // X u<= (X + C)<nuw> for any C if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW)) return true; + + if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.ule(C2)) + return true; + break; case ICmpInst::ICMP_UGT: @@ -10101,6 +10138,9 @@ bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, // X u< (X + C)<nuw> if C != 0 if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNUW) && !C.isNullValue()) return true; + + if (MatchBinaryAddToConst2(LHS, RHS, C, C2, SCEV::FlagNUW) && C.ult(C2)) + return true; break; } diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll index bdc934ae11af..c2cbb96d4236 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/memory-runtime-checks.ll @@ -53,7 +53,7 @@ define void @needs_versioning_profitable(i32* %dst, i32* %src) { ; CHECK-NEXT: entry.slpmemcheck: ; CHECK-NEXT: [[DST16:%.*]] = bitcast i32* [[DST:%.*]] to i8* ; CHECK-NEXT: [[SRC18:%.*]] = bitcast i32* [[SRC:%.*]] to i8* -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[DST]], i64 2 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[DST]], i64 3 ; CHECK-NEXT: [[SCEVGEP17:%.*]] = bitcast i32* [[SCEVGEP]] to i8* ; CHECK-NEXT: [[SCEVGEP19:%.*]] = getelementptr i32, i32* [[SRC]], i64 3 ; CHECK-NEXT: [[SCEVGEP1920:%.*]] = bitcast i32* [[SCEVGEP19]] to i8* @@ -62,23 +62,17 @@ define void @needs_versioning_profitable(i32* %dst, i32* %src) { ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[ENTRY_SCALAR:%.*]], label [[ENTRY_SLPVERSIONED:%.*]] ; CHECK: entry.slpversioned: -; CHECK-NEXT: [[SRC_0:%.*]] = load i32, i32* [[SRC]], align 4, !alias.scope !5, !noalias !8 -; CHECK-NEXT: [[R_0:%.*]] = ashr i32 [[SRC_0]], 16 -; CHECK-NEXT: store i32 [[R_0]], i32* [[DST]], align 4, !alias.scope !8, !noalias !5 ; CHECK-NEXT: [[SRC_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 1 -; CHECK-NEXT: [[SRC_1:%.*]] = load i32, i32* [[SRC_GEP_1]], align 4 -; CHECK-NEXT: [[R_1:%.*]] = ashr i32 [[SRC_1]], 16 ; CHECK-NEXT: [[DST_GEP_1:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 1 -; CHECK-NEXT: store i32 [[R_1]], i32* [[DST_GEP_1]], align 4 ; CHECK-NEXT: [[SRC_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 2 ; CHECK-NEXT: [[DST_GEP_2:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 2 ; CHECK-NEXT: [[SRC_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 3 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC_GEP_2]] to <2 x i32>* -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, <2 x i32>* [[TMP0]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = ashr <2 x i32> [[TMP1]], <i32 16, i32 16> +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[SRC]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !alias.scope !5, !noalias !8 +; CHECK-NEXT: [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 16, i32 16, i32 16, i32 16> ; CHECK-NEXT: [[DST_GEP_3:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 3 -; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST_GEP_2]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP2]], <2 x i32>* [[TMP3]], align 4 +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* [[DST]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !5 ; CHECK-NEXT: br label [[ENTRY_MERGE:%.*]] ; CHECK: entry.merge: ; CHECK-NEXT: ret void @@ -156,7 +150,7 @@ define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly ; CHECK-NEXT: entry.slpmemcheck: ; CHECK-NEXT: [[OUT_BLOCK12:%.*]] = bitcast i32* [[OUT_BLOCK:%.*]] to i8* ; CHECK-NEXT: [[COUNTER14:%.*]] = bitcast i32* [[COUNTER:%.*]] to i8* -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 2 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 3 ; CHECK-NEXT: [[SCEVGEP13:%.*]] = bitcast i32* [[SCEVGEP]] to i8* ; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i32, i32* [[COUNTER]], i64 3 ; CHECK-NEXT: [[SCEVGEP1516:%.*]] = bitcast i32* [[SCEVGEP15]] to i8* @@ -165,52 +159,44 @@ define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[ENTRY_SCALAR:%.*]], label [[ENTRY_SLPVERSIONED:%.*]] ; CHECK: entry.slpversioned: -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[COUNTER]], align 4, !alias.scope !10, !noalias !13 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4, !alias.scope !13, !noalias !10 -; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], [[TMP0]] -; CHECK-NEXT: store i32 [[XOR]], i32* [[OUT_BLOCK]], align 4, !alias.scope !13, !noalias !10 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4 -; CHECK-NEXT: [[XOR_1:%.*]] = xor i32 [[TMP3]], [[TMP2]] -; CHECK-NEXT: store i32 [[XOR_1]], i32* [[ARRAYIDX2_1]], align 4 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2 ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3 -; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[ARRAYIDX_2]] to <2 x i32>* -; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[COUNTER]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !alias.scope !10, !noalias !13 ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[ARRAYIDX2_2]] to <2 x i32>* -; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4 -; CHECK-NEXT: [[TMP8:%.*]] = xor <2 x i32> [[TMP7]], [[TMP5]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX2_2]] to <2 x i32>* -; CHECK-NEXT: store <2 x i32> [[TMP8]], <2 x i32>* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !13, !noalias !10 +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4, !alias.scope !13, !noalias !10 ; CHECK-NEXT: br label [[ENTRY_MERGE:%.*]] ; CHECK: entry.merge: ; CHECK-NEXT: ret void ; CHECK: entry.scalar: -; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[COUNTER]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4 -; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[COUNTER]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4 +; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: store i32 [[XOR2]], i32* [[OUT_BLOCK]], align 4 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_14:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1 -; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4 -; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP13]], [[TMP12]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4 +; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP9]], [[TMP8]] ; CHECK-NEXT: store i32 [[XOR_15]], i32* [[ARRAYIDX2_14]], align 4 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2 -; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_27:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2 -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4 -; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP15]], [[TMP14]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4 +; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP11]], [[TMP10]] ; CHECK-NEXT: store i32 [[XOR_28]], i32* [[ARRAYIDX2_27]], align 4 ; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3 -; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_310:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3 -; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4 -; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP17]], [[TMP16]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4 +; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: store i32 [[XOR_311]], i32* [[ARRAYIDX2_310]], align 4 ; CHECK-NEXT: br label [[ENTRY_MERGE]] ; @@ -387,10 +373,12 @@ define void @slp_not_beneficial(i32* %A, i32* %B) { ; CHECK-NEXT: bb.slpmemcheck: ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 4 ; CHECK-NEXT: [[SCEVGEP6:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 4 +; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i32, i32* [[A]], i64 5 ; CHECK-NEXT: [[SCEVGEP78:%.*]] = bitcast i32* [[SCEVGEP7]] to i8* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP6]], [[SCEVGEP78]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP78]], [[SCEVGEP6]] +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 4 +; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast i32* [[SCEVGEP9]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP6]], [[SCEVGEP910]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP910]], [[SCEVGEP78]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 4 ; CHECK-NEXT: store i32 0, i32* [[TMP2]], align 8 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll index 4db147a53076..8e1bb67d33e8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/memory-runtime-checks.ll @@ -6,36 +6,55 @@ define void @version_multiple(i32* nocapture %out_block, i32* nocapture readonly ; CHECK-NEXT: entry.slpmemcheck: ; CHECK-NEXT: [[OUT_BLOCK12:%.*]] = bitcast i32* [[OUT_BLOCK:%.*]] to i8* ; CHECK-NEXT: [[COUNTER14:%.*]] = bitcast i32* [[COUNTER:%.*]] to i8* -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[OUT_BLOCK]], i64 3 ; CHECK-NEXT: [[SCEVGEP13:%.*]] = bitcast i32* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i32, i32* [[COUNTER]], i64 1 +; CHECK-NEXT: [[SCEVGEP15:%.*]] = getelementptr i32, i32* [[COUNTER]], i64 3 ; CHECK-NEXT: [[SCEVGEP1516:%.*]] = bitcast i32* [[SCEVGEP15]] to i8* ; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[OUT_BLOCK12]], [[SCEVGEP1516]] ; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[COUNTER14]], [[SCEVGEP13]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[COUNTER]], align 4 -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4 -; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[ENTRY_SCALAR:%.*]], label [[ENTRY_SLPVERSIONED:%.*]] +; CHECK: entry.slpversioned: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1 +; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1 +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2 +; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2 +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[COUNTER]] to <4 x i32>* +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !alias.scope !0, !noalias !3 +; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>* +; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP2]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: [[TMP4:%.*]] = xor <4 x i32> [[TMP3]], [[TMP1]] +; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[OUT_BLOCK]] to <4 x i32>* +; CHECK-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 4, !alias.scope !3, !noalias !0 +; CHECK-NEXT: br label [[ENTRY_MERGE:%.*]] +; CHECK: entry.merge: +; CHECK-NEXT: ret void +; CHECK: entry.scalar: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[COUNTER]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[OUT_BLOCK]], align 4 +; CHECK-NEXT: [[XOR2:%.*]] = xor i32 [[TMP7]], [[TMP6]] ; CHECK-NEXT: store i32 [[XOR2]], i32* [[OUT_BLOCK]], align 4 ; CHECK-NEXT: [[ARRAYIDX_13:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 1 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_13]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_14:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 1 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4 -; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_14]], align 4 +; CHECK-NEXT: [[XOR_15:%.*]] = xor i32 [[TMP9]], [[TMP8]] ; CHECK-NEXT: store i32 [[XOR_15]], i32* [[ARRAYIDX2_14]], align 4 ; CHECK-NEXT: [[ARRAYIDX_26:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 2 -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_26]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_27:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 2 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4 -; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP5]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_27]], align 4 +; CHECK-NEXT: [[XOR_28:%.*]] = xor i32 [[TMP11]], [[TMP10]] ; CHECK-NEXT: store i32 [[XOR_28]], i32* [[ARRAYIDX2_27]], align 4 ; CHECK-NEXT: [[ARRAYIDX_39:%.*]] = getelementptr inbounds i32, i32* [[COUNTER]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_39]], align 4 ; CHECK-NEXT: [[ARRAYIDX2_310:%.*]] = getelementptr inbounds i32, i32* [[OUT_BLOCK]], i64 3 -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4 -; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP7]], [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_310]], align 4 +; CHECK-NEXT: [[XOR_311:%.*]] = xor i32 [[TMP13]], [[TMP12]] ; CHECK-NEXT: store i32 [[XOR_311]], i32* [[ARRAYIDX2_310]], align 4 -; CHECK-NEXT: ret void +; CHECK-NEXT: br label [[ENTRY_MERGE]] ; entry: %0 = load i32, i32* %counter, align 4 @@ -79,12 +98,14 @@ define void @delete_pointer_bound(float* %a, float* %b, i1 %c) #0 { ; CHECK-NEXT: call void @use(<8 x float> [[I71]]) ; CHECK-NEXT: ret void ; CHECK: then.slpmemcheck: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 8 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 5 ; CHECK-NEXT: [[SCEVGEP8:%.*]] = bitcast float* [[SCEVGEP]] to i8* -; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr float, float* [[B]], i64 14 +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr float, float* [[A]], i64 8 ; CHECK-NEXT: [[SCEVGEP910:%.*]] = bitcast float* [[SCEVGEP9]] to i8* -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP8]], [[SCEVGEP910]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP910]], [[SCEVGEP8]] +; CHECK-NEXT: [[SCEVGEP11:%.*]] = getelementptr float, float* [[B]], i64 14 +; CHECK-NEXT: [[SCEVGEP1112:%.*]] = bitcast float* [[SCEVGEP11]] to i8* +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP8]], [[SCEVGEP1112]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP1112]], [[SCEVGEP910]] ; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; CHECK-NEXT: [[A_83:%.*]] = getelementptr inbounds float, float* [[A]], i64 8 ; CHECK-NEXT: store float 0.000000e+00, float* [[A_83]], align 4 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
