llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-transforms Author: None (github-actions[bot]) <details> <summary>Changes</summary> resolves llvm/llvm-project#<!-- -->79137 --- Full diff: https://github.com/llvm/llvm-project/pull/79561.diff 3 Files Affected: - (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+46-7) - (modified) llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll (+10-1) - (added) llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll (+78) ``````````diff diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 7e67c90152829dc..dd6b88fee415a7a 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -657,16 +657,18 @@ class AccessAnalysis { AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, MemoryDepChecker::DepCandidates &DA, - PredicatedScalarEvolution &PSE) - : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) { + PredicatedScalarEvolution &PSE, + SmallPtrSetImpl<MDNode *> &LoopAliasScopes) + : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE), + LoopAliasScopes(LoopAliasScopes) { // We're analyzing dependences across loop iterations. BAA.enableCrossIterationMode(); } /// Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { - Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); + Value *Ptr = const_cast<Value *>(Loc.Ptr); + AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); if (IsReadOnly) ReadOnlyPtr.insert(Ptr); @@ -674,8 +676,8 @@ class AccessAnalysis { /// Register a store. void addStore(MemoryLocation &Loc, Type *AccessTy) { - Value *Ptr = const_cast<Value*>(Loc.Ptr); - AST.add(Loc.getWithNewSize(LocationSize::beforeOrAfterPointer())); + Value *Ptr = const_cast<Value *>(Loc.Ptr); + AST.add(adjustLoc(Loc)); Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); } @@ -731,6 +733,32 @@ class AccessAnalysis { private: typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; + /// Adjust the MemoryLocation so that it represents accesses to this + /// location across all iterations, rather than a single one. + MemoryLocation adjustLoc(MemoryLocation Loc) const { + // The accessed location varies within the loop, but remains within the + // underlying object. + Loc.Size = LocationSize::beforeOrAfterPointer(); + Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope); + Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias); + return Loc; + } + + /// Drop alias scopes that are only valid within a single loop iteration. + MDNode *adjustAliasScopeList(MDNode *ScopeList) const { + if (!ScopeList) + return nullptr; + + // For the sake of simplicity, drop the whole scope list if any scope is + // iteration-local. + if (any_of(ScopeList->operands(), [&](Metadata *Scope) { + return LoopAliasScopes.contains(cast<MDNode>(Scope)); + })) + return nullptr; + + return ScopeList; + } + /// Go over all memory access and check whether runtime pointer checks /// are needed and build sets of dependency check candidates. void processMemAccesses(); @@ -775,6 +803,10 @@ class AccessAnalysis { PredicatedScalarEvolution &PSE; DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects; + + /// Alias scopes that are declared inside the loop, and as such not valid + /// across iterations. + SmallPtrSetImpl<MDNode *> &LoopAliasScopes; }; } // end anonymous namespace @@ -2283,6 +2315,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, // Holds the Load and Store instructions. SmallVector<LoadInst *, 16> Loads; SmallVector<StoreInst *, 16> Stores; + SmallPtrSet<MDNode *, 8> LoopAliasScopes; // Holds all the different accesses in the loop. unsigned NumReads = 0; @@ -2326,6 +2359,11 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, if (HasComplexMemInst) continue; + // Record alias scopes defined inside the loop. + if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) + for (Metadata *Op : Decl->getScopeList()->operands()) + LoopAliasScopes.insert(cast<MDNode>(Op)); + // Many math library functions read the rounding mode. We will only // vectorize a loop if it contains known function calls that don't set // the flag. Therefore, it is safe to ignore this read from memory. @@ -2407,7 +2445,8 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, } MemoryDepChecker::DepCandidates DependentAccesses; - AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE); + AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE, + LoopAliasScopes); // Holds the analyzed pointers. We don't want to call getUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once diff --git a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll index 98bb5f99a40a1e2..fb296f5089422db 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/noalias-scope-decl.ll @@ -7,8 +7,17 @@ define void @test_scope_in_loop(ptr %arg, i64 %num) { ; CHECK-LABEL: 'test_scope_in_loop' ; CHECK-NEXT: loop: -; CHECK-NEXT: Memory dependences are safe +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: +; CHECK-NEXT: Backward: +; CHECK-NEXT: %load.prev = load i8, ptr %prev.ptr, align 1, !alias.scope !0, !noalias !3 -> +; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %load.cur = load i8, ptr %cur.ptr, align 1, !alias.scope !3 -> +; CHECK-NEXT: store i8 %add, ptr %cur.ptr, align 1, !alias.scope !3 +; CHECK-EMPTY: ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Grouped accesses: ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll new file mode 100644 index 000000000000000..5c85c0d21f59f74 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll @@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S -O3 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define internal void @acc(ptr noalias noundef %val, ptr noalias noundef %prev) { +entry: + %0 = load i8, ptr %prev, align 1 + %conv = zext i8 %0 to i32 + %1 = load i8, ptr %val, align 1 + %conv1 = zext i8 %1 to i32 + %add = add nsw i32 %conv1, %conv + %conv2 = trunc i32 %add to i8 + store i8 %conv2, ptr %val, align 1 + ret void +} + +; This loop should not get vectorized. +define void @accsum(ptr noundef %vals, i64 noundef %num) #0 { +; CHECK-LABEL: define void @accsum( +; CHECK-SAME: ptr nocapture noundef [[VALS:%.*]], i64 noundef [[NUM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i64 [[NUM]], 1 +; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: [[LOAD_INITIAL:%.*]] = load i8, ptr [[VALS]], align 1 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]] +; CHECK-NEXT: store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: [[INC]] = add nuw i64 [[I_02]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] +; CHECK: for.end: +; CHECK-NEXT: ret void +; +entry: + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %i.0 = phi i64 [ 1, %entry ], [ %inc, %for.inc ] + %cmp = icmp ult i64 %i.0, %num + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + br label %for.end + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i8, ptr %vals, i64 %i.0 + %sub = sub i64 %i.0, 1 + %arrayidx1 = getelementptr inbounds i8, ptr %vals, i64 %sub + call void @acc(ptr noundef %arrayidx, ptr noundef %arrayidx1) + br label %for.inc + +for.inc: ; preds = %for.body + %inc = add i64 %i.0, 1 + br label %for.cond + +for.end: ; preds = %for.cond.cleanup + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"} +;. +; CHECK: [[META0]] = !{[[META1:![0-9]+]]} +; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"acc: %val"} +; CHECK: [[META2]] = distinct !{[[META2]], !"acc"} +; CHECK: [[META3]] = !{[[META4:![0-9]+]]} +; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"} +;. `````````` </details> https://github.com/llvm/llvm-project/pull/79561 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits