llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-transforms Author: Ryotaro Kasuga (kasuga-fj) <details> <summary>Changes</summary> This patch adds a check in validation for delinearization to ensure that the offset calculation does not overflow. If it overflows, different subscripts could map to the same linear index, leading to incorrect behavior. For fixed-size arrays, the check is relatively straightforward. However, for dynamic-size arrays (i.e., arrays where the size is not known at compile time), it's difficult to prove this statically, and it fails for almost all cases. Maybe we need to add some runtime checks or reasoning based on `inbounds` like LAA does. Fixes the test cases added in #<!-- -->169048. --- Full diff: https://github.com/llvm/llvm-project/pull/169902.diff 10 Files Affected: - (modified) llvm/lib/Analysis/Delinearization.cpp (+51) - (modified) llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll (+1-1) - (modified) llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll (+2-2) - (modified) llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll (+1-1) - (modified) llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll (+2-2) - (modified) llvm/test/Analysis/Delinearization/validation_large_size.ll (+5-8) - (modified) llvm/test/Analysis/DependenceAnalysis/DADelin.ll (+16-16) - (modified) llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll (+1-1) - (modified) llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll (+7-3) - (modified) llvm/test/Transforms/LICM/lnicm.ll (+3) ``````````diff diff --git a/llvm/lib/Analysis/Delinearization.cpp b/llvm/lib/Analysis/Delinearization.cpp index 0c3b02ae09f47..d847de0edca12 100644 --- a/llvm/lib/Analysis/Delinearization.cpp +++ b/llvm/lib/Analysis/Delinearization.cpp @@ -747,6 +747,20 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE, ArrayRef<const SCEV *> Sizes, ArrayRef<const SCEV *> Subscripts, const Value *Ptr) { + // Sizes and Subscripts are as follows: + // + // Sizes: [UNK][S_2]...[S_n] + // Subscripts: [I_1][I_2]...[I_n] + // + // where the size of the outermost dimension is unknown (UNK). + + auto MulOverflow = [&](const SCEV *A, const SCEV *B) -> const SCEV * { + if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/true, A, B)) + return nullptr; + return SE.getMulExpr(A, B); + }; + + // Range check: 0 <= I_k < S_k for k = 2..n. for (size_t I = 1; I < Sizes.size(); ++I) { const SCEV *Size = Sizes[I - 1]; const SCEV *Subscript = Subscripts[I]; @@ -755,6 +769,43 @@ bool llvm::validateDelinearizationResult(ScalarEvolution &SE, if (!isKnownLessThan(&SE, Subscript, Size)) return false; } + + // The offset computation is as follows: + // + // Offset = I_n + + // S_n * I_{n-1} + + // ... + + // (S_2 * ... * S_n) * I_1 + // + // Regarding this as a function from (I_1, I_2, ..., I_n) to integers, it + // must be injective. To guarantee it, the above calculation must not + // overflow. Since we have already checked that 0 <= I_k < S_k for k = 2..n, + // the minimum and maximum values occur in the following cases: + // + // Min = [I_1][0]...[0] = S_2 * ... * S_n * I_1 + // Max = [I_1][S_2-1]...[S_n-1] + // = (S_2 * ... * S_n) * I_1 + + // (S_2 * ... * S_{n-1}) * (S_2 - 1) + + // ... + + // (S_n - 1) + // = (S_2 * ... * S_n) * I_1 + + // (S_2 * ... * S_n) - 1 (can be proved by induction) + // + const SCEV *Prod = SE.getOne(Sizes[0]->getType()); + for (const SCEV *Size : drop_end(Sizes)) { + Prod = MulOverflow(Prod, Size); + if (!Prod) + return false; + } + const SCEV *Min = MulOverflow(Prod, Subscripts[0]); + if (!Min) + return false; + + // Over-approximate Max as Prod * I_1 + Prod (ignoring the -1). + if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/true, Min, + Subscripts[0])) + return false; + return true; } diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll index 9e6a4221f8eda..7e5c5142dccbc 100644 --- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll +++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll @@ -11,7 +11,7 @@ define void @mat_mul(ptr %C, ptr %A, ptr %B, i64 %N) !kernel_arg_addr_space !2 ! ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes. ; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; CHECK-EMPTY: ; CHECK-NEXT: Inst: %tmp5 = load float, ptr %arrayidx4, align 4 ; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc> diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll index e1ad1c55313a4..e5d2806101926 100644 --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll @@ -16,14 +16,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; CHECK-EMPTY: ; CHECK-NEXT: Inst: store double %val, ptr %arrayidx, align 8 ; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j> ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll index d5213e5afb33c..f5f0628ede937 100644 --- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll +++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll @@ -16,7 +16,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll index 011dc40697cb5..f768002dd9e41 100644 --- a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll +++ b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll @@ -19,14 +19,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; CHECK-EMPTY: ; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx1, align 8 ; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<%for.j> ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%n] with elements of 8 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.i>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i diff --git a/llvm/test/Analysis/Delinearization/validation_large_size.ll b/llvm/test/Analysis/Delinearization/validation_large_size.ll index f78e68f60569a..1117399a47364 100644 --- a/llvm/test/Analysis/Delinearization/validation_large_size.ll +++ b/llvm/test/Analysis/Delinearization/validation_large_size.ll @@ -1,12 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 ; RUN: opt < %s -passes='print<delinearization>' --delinearize-use-fixed-size-array-heuristic -disable-output 2>&1 | FileCheck %s -; FIXME: When considering an array as a function from subcripts to addresses, -; it should be injective. That is, different subscript tuples should map to -; different addresses. Currently, delinearization doesn't guarantee this -; property, especially when the inferred array size is very large so that the -; product of dimensions may overflow. The delinearization validation should -; consider such cases as invalid. +; When considering an array as a function from subcripts to addresses, it +; should be injective. That is, different subscript tuples should map to +; different addresses. ; for (i = 0; i < (1ULL << 60); i++) ; for (j = 0; j < 256; j++) @@ -23,7 +20,7 @@ define void @large_size_fixed(ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][256] with elements of 1 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: br label %for.i.header @@ -75,7 +72,7 @@ define void @large_size_parametric(i64 %n, i64 %m, i64 %o, ptr %A) { ; CHECK-NEXT: Base offset: %A ; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 1 bytes. ; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k.header>] -; CHECK-NEXT: Delinearization validation: Succeeded +; CHECK-NEXT: Delinearization validation: Failed ; entry: %guard.i = icmp sgt i64 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll index 8f94a455d3724..130b9930cfdf5 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll @@ -13,11 +13,11 @@ target triple = "thumbv8m.main-arm-none-eabi" define void @t1(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't1' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - consistent anti [0 0 0|<]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 @@ -78,7 +78,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t2(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't2' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -145,7 +145,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t3(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't3' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -212,7 +212,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t4(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't4' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -279,7 +279,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t5(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't5' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 @@ -346,11 +346,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t6(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't6' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [-1 0 0]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 @@ -414,11 +414,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't7' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [1 0 0]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 @@ -482,11 +482,11 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3, define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) { ; CHECK-LABEL: 't8' ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - input [* * *]! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - consistent anti [0 0 1]! +; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: %cmp49 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll index 069a540ea0295..0ff9c05c884ff 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll @@ -100,7 +100,7 @@ define void @linearized_accesses(i64 %n, i64 %m, i64 %o, ptr %A) { ; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 ; CHECK-NEXT: da analyze - output [* * *|<]! ; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - output [* * *]! ; entry: br label %for.i diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll index 19cef4537a769..16e0e7bccaaf5 100644 --- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll @@ -536,9 +536,13 @@ for.end: ; preds = %for.body ;; A[i] = 0; define void @strong11(ptr %A) nounwind uwtable ssp { -; CHECK-LABEL: 'strong11' -; CHECK-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4 -; CHECK-NEXT: da analyze - consistent output [0 S]! +; CHECK-ALL-LABEL: 'strong11' +; CHECK-ALL-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4 +; CHECK-ALL-NEXT: da analyze - none! +; +; CHECK-STRONG-SIV-LABEL: 'strong11' +; CHECK-STRONG-SIV-NEXT: Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4 +; CHECK-STRONG-SIV-NEXT: da analyze - consistent output [0 S]! ; entry: br label %for.cond1.preheader diff --git a/llvm/test/Transforms/LICM/lnicm.ll b/llvm/test/Transforms/LICM/lnicm.ll index 814f964666305..e331ab7d39e83 100644 --- a/llvm/test/Transforms/LICM/lnicm.ll +++ b/llvm/test/Transforms/LICM/lnicm.ll @@ -3,6 +3,9 @@ ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(lnicm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LNICM ; RUN: opt -aa-pipeline=basic-aa -passes='loop-mssa(licm),loop(loop-interchange)' -cache-line-size=64 -S %s | FileCheck %s --check-prefixes LICM +; XFAIL: * +; Loop interchange currently fails due to a failure in dependence analysis. + ; This test represents the following function: ; void test(int n, int m, int x[m][n], int y[n], int *z) { ; for (int k = 0; k < n; k++) { `````````` </details> https://github.com/llvm/llvm-project/pull/169902 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
