control: tag -1 +confirmed +patch

We have successfully built Julia with the patched llvm-6.0 .
Please merge the two patches and update the llvm package.

 * llvm-D49832-SCEVPred.patch
 * llvm-rL323946-LSRTy.patch

They come from Julia's commit df451468a14e0b0f7985f8396a6c15ef5a411422
commit 98592fcc61307968f7df1362771534595a1e1c21
Author: Keno Fischer <k...@juliacomputing.com>
Date:   Wed Jul 25 19:29:02 2018 -0400

    [SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
    
    Summary:
    In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
    intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
    base pointer. Luckily this is a common task for SCEV, so all we have to do here
    is hook up the corresponding helper function and add test case.
    
    Fixes PR38290
    
    Reviewers: reames, sanjoy
    
    Subscribers: javed.absar, llvm-commits
    
    Differential Revision: https://reviews.llvm.org/D49832

diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7f76f057216..f441a3647fb 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
   const SCEV *Step = AR->getStepRecurrence(SE);
   const SCEV *Start = AR->getStart();
 
+  Type *ARTy = AR->getType();
   unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
-  unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
+  unsigned DstBits = SE.getTypeSizeInBits(ARTy);
 
   // The expression {Start,+,Step} has nusw/nssw if
   //   Step < 0, Start - |Step| * Backedge <= Start
@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
   Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
 
   IntegerType *Ty =
-      IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
+      IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+  Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
 
   Value *StepValue = expandCodeFor(Step, Ty, Loc);
   Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
-  Value *StartValue = expandCodeFor(Start, Ty, Loc);
+  Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
 
   ConstantInt *Zero =
       ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
   // Compute:
   //   Start + |Step| * Backedge < Start
   //   Start - |Step| * Backedge > Start
-  Value *Add = Builder.CreateAdd(StartValue, MulV);
-  Value *Sub = Builder.CreateSub(StartValue, MulV);
+  Value *Add = nullptr, *Sub = nullptr;
+  if (ARExpandTy->isPointerTy()) {
+    PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
+    const SCEV *MulS = SE.getSCEV(MulV);
+    const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
+    Add = Builder.CreateBitCast(
+        expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
+        ARPtrTy);
+    Sub = Builder.CreateBitCast(
+        expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
+        ARPtrTy);
+  } else {
+    Add = Builder.CreateAdd(StartValue, MulV);
+    Sub = Builder.CreateSub(StartValue, MulV);
+  }
 
   Value *EndCompareGT = Builder.CreateICmp(
       Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
new file mode 100644
index 00000000000..ddcf5e1a195
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
@@ -0,0 +1,73 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
+
+; NB: addrspaces 10-13 are non-integral
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+
+; This matches the test case from PR38290
+; Check that we expand the SCEV predicate check using GEP, rather
+; than ptrtoint.
+
+%jl_value_t = type opaque
+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
+
+declare i64 @julia_steprange_last_4949()
+
+define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
+; LV-LAVEL: L26.lver.check
+; LV: [[OFMul:%[^ ]*]]  = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
+; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
+; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
+; LV-NOT: inttoptr
+; LV-NOT: ptrtoint
+top:
+  %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
+  %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
+  %3 = sub i32 0, %2
+  %4 = call i64 @julia_steprange_last_4949()
+  %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
+  %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
+  %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
+  %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
+  %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
+  %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
+  %11 = sext i32 %3 to i64
+  br label %L26
+
+L26:                                              ; preds = %L26, %top
+  %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
+  %12 = add i64 %value_phi3, -1
+  %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
+  %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
+  %15 = add i64 %12, %11
+  %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
+  store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
+  %17 = icmp eq i64 %value_phi3, %4
+  br i1 %17, label %L45, label %L26
+
+L45:                                              ; preds = %L26
+  ret void
+}
+
+attributes #0 = { "thunk" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !{}
+!2 = !{i64 16}
+!3 = !{i64 8}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_mutab", !6, i64 0}
+!6 = !{!"jtbaa_value", !7, i64 0}
+!7 = !{!"jtbaa_data", !8, i64 0}
+!8 = !{!"jtbaa"}
+!9 = !{i64 40}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"jtbaa_arrayptr", !12, i64 0}
+!12 = !{!"jtbaa_array", !8, i64 0}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"jtbaa_arraybuf", !7, i64 0}
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index a7e5bce7445..fa6fccecbf1 100644
--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 
@@ -233,10 +233,10 @@ for.end:                                          ; preds = %for.body
 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 
commit ab60b05a472e8651cbe53c19513b7e62b9ff32df
Author: Mikael Holmen <mikael.hol...@ericsson.com>
Date:   Thu Feb 1 06:38:34 2018 +0000

    [LSR] Don't force bases of foldable formulae to the final type.
    
    Summary:
    Before emitting code for scaled registers, we prevent
    SCEVExpander from hoisting any scaled addressing mode
    by emitting all the bases first. However, these bases
    are being forced to the final type, resulting in some
    odd code.
    
    For example, if the type of the base is an integer and
    the final type is a pointer, we will emit an inttoptr
    for the base, a ptrtoint for the scale, and then a
    'reverse' GEP where the GEP pointer is actually the base
    integer and the index is the pointer. It's more intuitive
    to use the pointer as a pointer and the integer as index.
    
    Patch by: Bevin Hansson
    
    Reviewers: atrick, qcolombet, sanjoy
    
    Reviewed By: qcolombet
    
    Subscribers: llvm-commits
    
    Differential Revision: https://reviews.llvm.org/D42103
    
    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@323946 91177308-0d34-0410-b5e6-96231b3b80d8

diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 332c074a1df..4b8e2286ed9 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4993,7 +4993,7 @@ Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
       // Unless the addressing mode will not be folded.
       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
           isAMCompletelyFolded(TTI, LU, F)) {
-        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
         Ops.clear();
         Ops.push_back(SE.getUnknown(FullV));
       }

Reply via email to