control: tag -1 +confirmed +patch We have successfully built Julia with the patched llvm-6.0 . Please merge the two patches and update the llvm package.
* llvm-D49832-SCEVPred.patch * llvm-rL323946-LSRTy.patch They come from Julia's commit df451468a14e0b0f7985f8396a6c15ef5a411422
commit 98592fcc61307968f7df1362771534595a1e1c21 Author: Keno Fischer <k...@juliacomputing.com> Date: Wed Jul 25 19:29:02 2018 -0400 [SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces Summary: In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint intrinsics. Instead, we need to expand any pointer arithmetic as geps on the base pointer. Luckily this is a common task for SCEV, so all we have to do here is hook up the corresponding helper function and add test case. Fixes PR38290 Reviewers: reames, sanjoy Subscribers: javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D49832 diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 7f76f057216..f441a3647fb 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, const SCEV *Step = AR->getStepRecurrence(SE); const SCEV *Start = AR->getStart(); + Type *ARTy = AR->getType(); unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); - unsigned DstBits = SE.getTypeSizeInBits(AR->getType()); + unsigned DstBits = SE.getTypeSizeInBits(ARTy); // The expression {Start,+,Step} has nusw/nssw if // Step < 0, Start - |Step| * Backedge <= Start @@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc); IntegerType *Ty = - IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType())); + IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); + Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; Value *StepValue = expandCodeFor(Step, Ty, Loc); Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc); - Value *StartValue = expandCodeFor(Start, Ty, Loc); + Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc); ConstantInt *Zero = ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); @@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, // Compute: // Start + |Step| * Backedge < Start // Start - |Step| * Backedge > Start - Value *Add = Builder.CreateAdd(StartValue, MulV); - Value *Sub = Builder.CreateSub(StartValue, MulV); + Value *Add = nullptr, *Sub = nullptr; + if (ARExpandTy->isPointerTy()) { + PointerType *ARPtrTy = cast<PointerType>(ARExpandTy); + const SCEV *MulS = SE.getSCEV(MulV); + const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)}; + Add = Builder.CreateBitCast( + expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue), + ARPtrTy); + Sub = Builder.CreateBitCast( + expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue), + ARPtrTy); + } else { + Add = Builder.CreateAdd(StartValue, MulV); + Sub = Builder.CreateSub(StartValue, MulV); + } Value *EndCompareGT = Builder.CreateICmp( Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll new file mode 100644 index 00000000000..ddcf5e1a195 --- /dev/null +++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll @@ -0,0 +1,73 @@ +; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV + +; NB: addrspaces 10-13 are non-integral +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" + +; This matches the test case from PR38290 +; Check that we expand the SCEV predicate check using GEP, rather +; than ptrtoint. + +%jl_value_t = type opaque +%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } + +declare i64 @julia_steprange_last_4949() + +define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 { +; LV-LAVEL: L26.lver.check +; LV: [[OFMul:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]]) +; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0 +; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1 +; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]] +; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]] +; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]] +; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]] +; LV-NOT: inttoptr +; LV-NOT: ptrtoint +top: + %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3 + %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4 + %3 = sub i32 0, %2 + %4 = call i64 @julia_steprange_last_4949() + %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)* + %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)* + %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2 + %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)* + %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)* + %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1 + %11 = sext i32 %3 to i64 + br label %L26 + +L26: ; preds = %L26, %top + %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ] + %12 = add i64 %value_phi3, -1 + %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12 + %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13 + %15 = add i64 %12, %11 + %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15 + store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13 + %17 = icmp eq i64 %value_phi3, %4 + br i1 %17, label %L45, label %L26 + +L45: ; preds = %L26 + ret void +} + +attributes #0 = { "thunk" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"Debug Info Version", i32 3} +!1 = !{} +!2 = !{i64 16} +!3 = !{i64 8} +!4 = !{!5, !5, i64 0} +!5 = !{!"jtbaa_mutab", !6, i64 0} +!6 = !{!"jtbaa_value", !7, i64 0} +!7 = !{!"jtbaa_data", !8, i64 0} +!8 = !{!"jtbaa"} +!9 = !{i64 40} +!10 = !{!11, !11, i64 0} +!11 = !{!"jtbaa_arrayptr", !12, i64 0} +!12 = !{!"jtbaa_array", !8, i64 0} +!13 = !{!14, !14, i64 0} +!14 = !{!"jtbaa_arraybuf", !7, i64 0} diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll index a7e5bce7445..fa6fccecbf1 100644 --- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll +++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll @@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 -; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2 -; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2 +; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]] +; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]] +; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]] +; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]] ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]] @@ -233,10 +233,10 @@ for.end: ; preds = %for.body ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]]) ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1 -; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]] -; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2 -; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2 +; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]] +; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]] +; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]] +; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]] ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]] ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
commit ab60b05a472e8651cbe53c19513b7e62b9ff32df Author: Mikael Holmen <mikael.hol...@ericsson.com> Date: Thu Feb 1 06:38:34 2018 +0000 [LSR] Don't force bases of foldable formulae to the final type. Summary: Before emitting code for scaled registers, we prevent SCEVExpander from hoisting any scaled addressing mode by emitting all the bases first. However, these bases are being forced to the final type, resulting in some odd code. For example, if the type of the base is an integer and the final type is a pointer, we will emit an inttoptr for the base, a ptrtoint for the scale, and then a 'reverse' GEP where the GEP pointer is actually the base integer and the index is the pointer. It's more intuitive to use the pointer as a pointer and the integer as index. Patch by: Bevin Hansson Reviewers: atrick, qcolombet, sanjoy Reviewed By: qcolombet Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D42103 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323946 91177308-0d34-0410-b5e6-96231b3b80d8 diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 332c074a1df..4b8e2286ed9 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, // Unless the addressing mode will not be folded. if (!Ops.empty() && LU.Kind == LSRUse::Address && isAMCompletelyFolded(TTI, LU, F)) { - Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); + Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr); Ops.clear(); Ops.push_back(SE.getUnknown(FullV)); }