https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/81289
>From 90639e9131670863ebb4c199a9861b2b0094d601 Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Fri, 9 Feb 2024 15:17:09 +0000 Subject: [PATCH 1/2] [SROA] Use !tbaa instead of !tbaa.struct if op matches field. If a split memory access introduced by SROA accesses precisely a single field of the original operation's !tbaa.struct, use the !tbaa tag for the accessed field directly instead of the full !tbaa.struct. InstCombine already had a similar logic. Motivation for this and follow-on patches is to improve codegen for libc++, where using memcpy limits optimizations, like vectorization for code iteration over std::vector<std::complex<float>>: https://godbolt.org/z/f3vqYos3c Depends on https://github.com/llvm/llvm-project/pull/81285. --- llvm/include/llvm/IR/Metadata.h | 2 + llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 13 ++++++ llvm/lib/Transforms/Scalar/SROA.cpp | 48 ++++++++++++++------ llvm/test/Transforms/SROA/tbaa-struct2.ll | 21 ++++----- llvm/test/Transforms/SROA/tbaa-struct3.ll | 16 +++---- 5 files changed, 67 insertions(+), 33 deletions(-) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 6f23ac44dee968..33363a271d4823 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -849,6 +849,8 @@ struct AAMDNodes { /// If his AAMDNode has !tbaa.struct and \p AccessSize matches the size of the /// field at offset 0, get the TBAA tag describing the accessed field. AAMDNodes adjustForAccess(unsigned AccessSize); + AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy, + const DataLayout &DL); }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index bfd70414c0340c..b2dc451d581939 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -833,3 +833,16 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { } return New; } + +AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, + const DataLayout &DL) { + + AAMDNodes New = shift(Offset); + if (!DL.typeSizeEqualsStoreSize(AccessTy)) + return New; + TypeSize Size = DL.getTypeStoreSize(AccessTy); + if (Size.isScalable()) + return New; + + return New.adjustForAccess(Size.getKnownMinValue()); +} diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 138dc38b5c14ce..f24cbbc1fe0591 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2914,7 +2914,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { // Do this after copyMetadataForLoad() to preserve the TBAA shift. if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.adjustForAccess( + NewBeginOffset - BeginOffset, NewLI->getType(), DL)); // Try to preserve nonnull metadata V = NewLI; @@ -2936,7 +2937,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); if (AATags) - NewLI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + NewLI->setAAMetadata(AATags.adjustForAccess( + NewBeginOffset - BeginOffset, NewLI->getType(), DL)); + if (LI.isVolatile()) NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID()); NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access, @@ -3011,7 +3014,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); Pass.DeadInsts.push_back(&SI); // NOTE: Careful to use OrigV rather than V. @@ -3038,7 +3042,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI, Store, Store->getPointerOperand(), @@ -3097,8 +3102,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { } NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - NewSI->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); + } if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -3280,8 +3287,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + V->getType(), DL)); + } migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getPointerOperand(), V, DL); @@ -3486,7 +3495,8 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); if (AATags) - Load->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + Load->getType(), DL)); Src = Load; } @@ -3507,8 +3517,10 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) - Store->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + if (AATags) { + Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, + Src->getType(), DL)); + } APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); if (IsDest) { @@ -3836,7 +3848,8 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); if (AATags && GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset)) - Load->setAAMetadata(AATags.shift(Offset.getZExtValue())); + Load->setAAMetadata( + AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL)); Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); LLVM_DEBUG(dbgs() << " to: " << *Load << "\n"); @@ -3887,8 +3900,10 @@ class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { APInt Offset( DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0); GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset); - if (AATags) - Store->setAAMetadata(AATags.shift(Offset.getZExtValue())); + if (AATags) { + Store->setAAMetadata(AATags.adjustForAccess( + Offset.getZExtValue(), ExtractValue->getType(), DL)); + } // migrateDebugInfo requires the base Alloca. Walk to it from this gep. // If we cannot (because there's an intervening non-const or unbounded @@ -4542,6 +4557,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { Value *StoreBasePtr = SI->getPointerOperand(); IRB.SetInsertPoint(SI); + AAMDNodes AATags = SI->getAAMetadata(); LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); @@ -4561,6 +4577,10 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group, LLVMContext::MD_DIAssignID}); + + if (AATags) + PStore->setAAMetadata( + AATags.adjustForAccess(PartOffset, PLoad->getType(), DL)); LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); } diff --git a/llvm/test/Transforms/SROA/tbaa-struct2.ll b/llvm/test/Transforms/SROA/tbaa-struct2.ll index 1fd37e82d67775..02c99a2b329457 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct2.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct2.ll @@ -13,9 +13,9 @@ define double @bar(ptr %wishart) { ; CHECK-NEXT: [[TMP_SROA_3:%.*]] = alloca [4 x i8], align 4 ; CHECK-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[WISHART:%.*]], align 8, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_2_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 8 -; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] +; CHECK-NEXT: [[TMP_SROA_2_0_COPYLOAD:%.*]] = load i32, ptr [[TMP_SROA_2_0_WISHART_SROA_IDX]], align 8, !tbaa [[TBAA5:![0-9]+]] ; CHECK-NEXT: [[TMP_SROA_3_0_WISHART_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[WISHART]], i64 12 -; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT8:![0-9]+]] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP_SROA_3]], ptr align 4 [[TMP_SROA_3_0_WISHART_SROA_IDX]], i64 4, i1 false), !tbaa.struct [[TBAA_STRUCT7:![0-9]+]] ; CHECK-NEXT: [[CALL:%.*]] = call double @subcall(double [[TMP_SROA_0_0_COPYLOAD]], i32 [[TMP_SROA_2_0_COPYLOAD]]) ; CHECK-NEXT: ret double [[CALL]] ; @@ -38,15 +38,14 @@ define double @bar(ptr %wishart) { ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, !1, i64 8, i64 4, !5} -; CHECK: [[META1:![0-9]+]] = !{!2, !2, i64 0} -; CHECK: [[META2:![0-9]+]] = !{!"double", !3, i64 0} -; CHECK: [[META3:![0-9]+]] = !{!"omnipotent char", !4, i64 0} -; CHECK: [[META4:![0-9]+]] = !{!"Simple C++ TBAA"} -; CHECK: [[META5:![0-9]+]] = !{!6, !6, i64 0} -; CHECK: [[META6:![0-9]+]] = !{!"int", !3, i64 0} -; CHECK: [[TBAA_STRUCT7]] = !{i64 0, i64 4, !5} -; CHECK: [[TBAA_STRUCT8]] = !{} +; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 8, [[META1:![0-9]+]], i64 8, i64 4, [[TBAA5]]} +; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[META2]] = !{!"double", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} +; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0} +; CHECK: [[META6]] = !{!"int", [[META3]], i64 0} +; CHECK: [[TBAA_STRUCT7]] = !{} ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index 4910e0e07ae380..603e7d708647fc 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -7,9 +7,9 @@ define void @load_store_transfer_split_struct_tbaa_2_float(ptr dereferenceable(2 ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[A]] to i32 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[B]] to i32 -; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4 +; CHECK-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] ; CHECK-NEXT: [[RES_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RES]], i64 4 -; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4 +; CHECK-NEXT: store i32 [[TMP1]], ptr [[RES_SROA_IDX]], align 4, !tbaa [[TBAA1:![0-9]+]] ; CHECK-NEXT: [[P:%.*]] = load ptr, ptr [[RES]], align 8 ; CHECK-NEXT: ret void ; @@ -29,9 +29,9 @@ define void @memcpy_transfer(ptr dereferenceable(24) %res, float %a, float %b) { ; CHECK-SAME: ptr dereferenceable(24) [[RES:%.*]], float [[A:%.*]], float [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[L_PTR:%.*]] = load ptr, ptr [[RES]], align 8 -; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0:![0-9]+]] +; CHECK-NEXT: store float [[A]], ptr [[L_PTR]], align 1, !tbaa.struct [[TBAA_STRUCT0]] ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 -; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] +; CHECK-NEXT: store float [[B]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: ret void ; entry: @@ -53,7 +53,7 @@ define void @memcpy_transfer_tbaa_field_and_size_do_not_align(ptr dereferenceabl ; CHECK-NEXT: [[TMP_SROA_2_0_L_PTR_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[L_PTR]], i64 4 ; CHECK-NEXT: [[TMP0:%.*]] = bitcast float [[B]] to i32 ; CHECK-NEXT: [[TMP_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[TMP0]] to i16 -; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5]] +; CHECK-NEXT: store i16 [[TMP_SROA_2_0_EXTRACT_TRUNC]], ptr [[TMP_SROA_2_0_L_PTR_SROA_IDX]], align 1, !tbaa.struct [[TBAA_STRUCT5:![0-9]+]] ; CHECK-NEXT: ret void ; entry: @@ -98,10 +98,10 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias !3 = !{!"omnipotent char", !4, i64 0} !4 = !{!"Simple C++ TBAA"} ;. -; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[META1:![0-9]+]], i64 4, i64 4, [[META1]]} -; CHECK: [[META1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} +; CHECK: [[TBAA_STRUCT0]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 4, [[TBAA1]]} +; CHECK: [[TBAA1]] = !{[[META2:![0-9]+]], [[META2]], i64 0} ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} -; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[META1]]} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]} ;. >From 137fe547415c6196498405f23e11f2fc4831675a Mon Sep 17 00:00:00 2001 From: Florian Hahn <f...@fhahn.com> Date: Thu, 15 Feb 2024 21:30:21 +0000 Subject: [PATCH 2/2] !fixup update new test cases, add handle missing cases. --- llvm/include/llvm/IR/Metadata.h | 1 + llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 6 ++- llvm/lib/Transforms/Scalar/SROA.cpp | 16 +++--- llvm/test/Transforms/SROA/tbaa-struct3.ll | 51 ++++++++++++-------- 4 files changed, 46 insertions(+), 28 deletions(-) diff --git a/llvm/include/llvm/IR/Metadata.h b/llvm/include/llvm/IR/Metadata.h index 94065b52740a5d..da6744fdd09166 100644 --- a/llvm/include/llvm/IR/Metadata.h +++ b/llvm/include/llvm/IR/Metadata.h @@ -851,6 +851,7 @@ struct AAMDNodes { AAMDNodes adjustForAccess(unsigned AccessSize); AAMDNodes adjustForAccess(size_t Offset, Type *AccessTy, const DataLayout &DL); + AAMDNodes adjustForAccess(size_t Offset, unsigned AccessSize); }; // Specialize DenseMapInfo for AAMDNodes. diff --git a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index b2dc451d581939..a8c881fce5fd4d 100644 --- a/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -836,7 +836,6 @@ AAMDNodes AAMDNodes::adjustForAccess(unsigned AccessSize) { AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, const DataLayout &DL) { - AAMDNodes New = shift(Offset); if (!DL.typeSizeEqualsStoreSize(AccessTy)) return New; @@ -846,3 +845,8 @@ AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, Type *AccessTy, return New.adjustForAccess(Size.getKnownMinValue()); } + +AAMDNodes AAMDNodes::adjustForAccess(size_t Offset, unsigned AccessSize) { + AAMDNodes New = shift(Offset); + return New.adjustForAccess(AccessSize); +} diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index f24cbbc1fe0591..6c8785d52c4eab 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -2936,6 +2936,7 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { LoadInst *NewLI = IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy), getSliceAlign(), LI.isVolatile(), LI.getName()); + if (AATags) NewLI->setAAMetadata(AATags.adjustForAccess( NewBeginOffset - BeginOffset, NewLI->getType(), DL)); @@ -3102,10 +3103,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { } NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) { + if (AATags) NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, V->getType(), DL)); - } if (SI.isVolatile()) NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID()); if (NewSI->isAtomic()) @@ -3207,12 +3207,14 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { // a single value type, just emit a memset. if (!CanContinue) { Type *SizeTy = II.getLength()->getType(); - Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset); + unsigned Sz = NewEndOffset - NewBeginOffset; + Constant *Size = ConstantInt::get(SizeTy, Sz); MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet( getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size, MaybeAlign(getSliceAlign()), II.isVolatile())); if (AATags) - New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset)); + New->setAAMetadata( + AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz)); migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getRawDest(), nullptr, DL); @@ -3287,10 +3289,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile()); New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) { + if (AATags) New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, V->getType(), DL)); - } migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II, New, New->getPointerOperand(), V, DL); @@ -3517,10 +3518,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> { IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile())); Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access, LLVMContext::MD_access_group}); - if (AATags) { + if (AATags) Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset, Src->getType(), DL)); - } APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0); if (IsDest) { diff --git a/llvm/test/Transforms/SROA/tbaa-struct3.ll b/llvm/test/Transforms/SROA/tbaa-struct3.ll index a3414e8ace538d..2a151c01adfcc0 100644 --- a/llvm/test/Transforms/SROA/tbaa-struct3.ll +++ b/llvm/test/Transforms/SROA/tbaa-struct3.ll @@ -100,7 +100,7 @@ define void @store_vector_part_first(ptr %y2, float %f) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) ; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 8, !tbaa.struct [[TBAA_STRUCT6:![0-9]+]] ; CHECK-NEXT: [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 8 -; CHECK-NEXT: store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa.struct [[TBAA_STRUCT5]] +; CHECK-NEXT: store float [[F]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 8, !tbaa [[TBAA1]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float, float, float } @@ -118,7 +118,7 @@ define void @store_vector_part_second(ptr %y2, float %f) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) ; CHECK-NEXT: store float [[F]], ptr [[Y2]], align 8, !tbaa.struct [[TBAA_STRUCT9:![0-9]+]] ; CHECK-NEXT: [[X7_SROA_2_0_Y2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[Y2]], i64 4 -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[X7_SROA_2_0_Y2_SROA_IDX]], align 4, !tbaa [[TBAA7:![0-9]+]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float, float, float } @@ -134,7 +134,7 @@ define void @store_vector_single(ptr %y2, float %f) { ; CHECK-LABEL: define void @store_vector_single( ; CHECK-SAME: ptr [[Y2:%.*]], float [[F:%.*]]) { ; CHECK-NEXT: [[V_1:%.*]] = call <2 x float> @foo(ptr [[Y2]]) -; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT11:![0-9]+]] +; CHECK-NEXT: store <2 x float> [[V_1]], ptr [[Y2]], align 4, !tbaa.struct [[TBAA_STRUCT10:![0-9]+]] ; CHECK-NEXT: ret void ; %x7 = alloca { float, float } @@ -161,8 +161,8 @@ define void @memset(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_0_SRC_SROA_IDX]], i32 10, i1 false) ; CHECK-NEXT: store i16 1, ptr [[A_SROA_3]], align 2 ; CHECK-NEXT: [[A_SROA_0_1_A_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT12:![0-9]+]] -; CHECK-NEXT: store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa.struct [[TBAA_STRUCT13:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_1_A_1_SROA_IDX2]], i8 42, i32 6, i1 false), !tbaa.struct [[TBAA_STRUCT11:![0-9]+]] +; CHECK-NEXT: store i16 10794, ptr [[A_SROA_3]], align 2, !tbaa [[TBAA1]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 7, i1 true) ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 7 ; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A_SROA_3]], align 2 @@ -199,8 +199,8 @@ define void @memset2(ptr %dst, ptr align 8 %src) { ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 2 [[A_SROA_4_0_SRC_SROA_IDX]], i32 90, i1 false) ; CHECK-NEXT: store i8 1, ptr [[A_SROA_3]], align 1 ; CHECK-NEXT: [[A_SROA_0_202_A_202_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 202 -; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa.struct [[TBAA_STRUCT14:![0-9]+]] -; CHECK-NEXT: store i8 42, ptr [[A_SROA_3]], align 1, !tbaa.struct [[TBAA_STRUCT15:![0-9]+]] +; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 1 [[A_SROA_0_202_A_202_SROA_IDX2]], i8 42, i32 7, i1 false), !tbaa.struct [[TBAA_STRUCT12:![0-9]+]] +; CHECK-NEXT: store i8 42, ptr [[A_SROA_3]], align 1, !tbaa [[TBAA7]] ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 209, i1 true) ; CHECK-NEXT: [[A_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 209 ; CHECK-NEXT: [[A_SROA_3_0_A_SROA_3_0_COPYLOAD1:%.*]] = load volatile i8, ptr [[A_SROA_3]], align 1 @@ -240,7 +240,7 @@ define void @slice_store_v2i8_1(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-NEXT: [[A_SROA_2_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 6 ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_COPYLOAD:%.*]] = load <2 x i8>, ptr [[A_SROA_2_0_SRC_SROA_IDX]], align 2 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_COPYLOAD]], ptr [[A_SROA_2_SROA_0]], align 4 -; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT16:![0-9]+]] +; CHECK-NEXT: store <2 x i8> bitcast (<1 x i16> <i16 123> to <2 x i8>), ptr [[A_SROA_2_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT13:![0-9]+]] ; CHECK-NEXT: [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_2_SROA_0]], align 4 ; CHECK-NEXT: store <2 x i8> [[A_SROA_2_SROA_0_0_A_SROA_2_SROA_0_0_A_SROA_2_6_V_4]], ptr [[DST_2]], align 2 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 1 [[A_SROA_0]], i32 6, i1 true) @@ -279,8 +279,8 @@ define void @slice_store_v2i8_2(ptr %dst, ptr %dst.2, ptr %src) { ; CHECK-NEXT: store i8 [[A_SROA_0_SROA_4_1_COPYLOAD]], ptr [[A_SROA_0_SROA_4]], align 1 ; CHECK-NEXT: [[A_SROA_4_1_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 3 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[A_SROA_4]], ptr align 1 [[A_SROA_4_1_SRC_SROA_IDX]], i32 5, i1 false) -; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT17:![0-9]+]] -; CHECK-NEXT: store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa.struct [[TBAA_STRUCT18:![0-9]+]] +; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[A_SROA_0_SROA_1]], align 2, !tbaa.struct [[TBAA_STRUCT14:![0-9]+]] +; CHECK-NEXT: store i8 0, ptr [[A_SROA_0_SROA_4]], align 1, !tbaa [[TBAA1]] ; CHECK-NEXT: [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4:%.*]] = load <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2 ; CHECK-NEXT: store <2 x i8> [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_A_SROA_0_1_V_4]], ptr [[DST_2]], align 2 ; CHECK-NEXT: [[A_SROA_0_SROA_1_0_A_SROA_0_SROA_1_1_COPYLOAD3:%.*]] = load volatile <2 x i8>, ptr [[A_SROA_0_SROA_1]], align 2 @@ -317,7 +317,7 @@ define double @tbaa_struct_load(ptr %src, ptr %dst) { ; CHECK-NEXT: [[TMP_SROA_3_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 8 ; CHECK-NEXT: [[TMP_SROA_3_0_COPYLOAD:%.*]] = load i64, ptr [[TMP_SROA_3_0_SRC_SROA_IDX]], align 8 ; CHECK-NEXT: store i64 [[TMP_SROA_3_0_COPYLOAD]], ptr [[TMP_SROA_3]], align 8 -; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT10]] +; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_LG:%.*]] = load double, ptr [[TMP_SROA_0]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1:%.*]] = load volatile double, ptr [[TMP_SROA_0]], align 8 ; CHECK-NEXT: store volatile double [[TMP_SROA_0_0_TMP_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 8 ; CHECK-NEXT: [[TMP_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 8 @@ -356,7 +356,7 @@ define i32 @shorten_integer_store_multiple_fields(ptr %dst, ptr %dst.2, ptr %src ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa.struct [[TBAA_STRUCT19:![0-9]+]] +; CHECK-NEXT: store i32 123, ptr [[A_SROA_0]], align 4, !tbaa [[TBAA7]] ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_COPYLOAD:%.*]] = load volatile i32, ptr [[A_SROA_0]], align 4 ; CHECK-NEXT: store volatile i32 [[A_SROA_0_0_A_SROA_0_0_COPYLOAD]], ptr [[DST]], align 1 @@ -393,7 +393,7 @@ define <2 x i16> @shorten_vector_store_single_fields(ptr %dst, ptr %dst.2, ptr % ; CHECK-SAME: ptr [[DST:%.*]], ptr [[DST_2:%.*]], ptr [[SRC:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i32>, align 8 -; CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT19]] +; CHECK-NEXT: store <2 x i32> <i32 1, i32 2>, ptr [[A_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT15:![0-9]+]] ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load <2 x i16>, ptr [[A_SROA_0]], align 8 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[DST]], ptr align 8 [[A_SROA_0]], i32 4, i1 true) ; CHECK-NEXT: ret <2 x i16> [[A_SROA_0_0_A_SROA_0_0_L]] @@ -429,11 +429,11 @@ define i32 @split_load_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-NEXT: [[A3_SROA_5_0_SRC_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 9 ; CHECK-NEXT: [[A3_SROA_5_0_COPYLOAD:%.*]] = load i8, ptr [[A3_SROA_5_0_SRC_SROA_IDX]], align 1 ; CHECK-NEXT: store i8 [[A3_SROA_5_0_COPYLOAD]], ptr [[A3_SROA_5]], align 1 -; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT20:![0-9]+]] +; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD:%.*]] = load i16, ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT16:![0-9]+]] ; CHECK-NEXT: [[LOAD4_FCA_0_INSERT:%.*]] = insertvalue { i16, float, i8 } poison, i16 [[A3_SROA_0_0_A3_SROA_0_0_LOAD4_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT21:![0-9]+]] +; CHECK-NEXT: [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD:%.*]] = load float, ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT17:![0-9]+]] ; CHECK-NEXT: [[LOAD4_FCA_1_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_0_INSERT]], float [[A3_SROA_33_0_A3_SROA_33_4_LOAD4_FCA_1_LOAD]], 1 -; CHECK-NEXT: [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa.struct [[TBAA_STRUCT15]] +; CHECK-NEXT: [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD:%.*]] = load i8, ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[LOAD4_FCA_2_INSERT:%.*]] = insertvalue { i16, float, i8 } [[LOAD4_FCA_1_INSERT]], i8 [[A3_SROA_4_0_A3_SROA_4_8_LOAD4_FCA_2_LOAD]], 2 ; CHECK-NEXT: [[UNWRAP2:%.*]] = extractvalue { i16, float, i8 } [[LOAD4_FCA_2_INSERT]], 1 ; CHECK-NEXT: [[VALCAST2:%.*]] = bitcast float [[UNWRAP2]] to i32 @@ -492,11 +492,11 @@ define i32 @split_store_with_tbaa_struct(i32 %x, ptr %src, ptr %dst) { ; CHECK-NEXT: [[I_2:%.*]] = insertvalue { i16, float, i8 } [[I_1]], float 3.000000e+00, 1 ; CHECK-NEXT: [[I_3:%.*]] = insertvalue { i16, float, i8 } [[I_2]], i8 99, 2 ; CHECK-NEXT: [[I_3_FCA_0_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 0 -; CHECK-NEXT: store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT20]] +; CHECK-NEXT: store i16 [[I_3_FCA_0_EXTRACT]], ptr [[A3_SROA_0]], align 8, !tbaa.struct [[TBAA_STRUCT16]] ; CHECK-NEXT: [[I_3_FCA_1_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 1 -; CHECK-NEXT: store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT21]] +; CHECK-NEXT: store float [[I_3_FCA_1_EXTRACT]], ptr [[A3_SROA_33]], align 4, !tbaa.struct [[TBAA_STRUCT17]] ; CHECK-NEXT: [[I_3_FCA_2_EXTRACT:%.*]] = extractvalue { i16, float, i8 } [[I_3]], 2 -; CHECK-NEXT: store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa.struct [[TBAA_STRUCT15]] +; CHECK-NEXT: store i8 [[I_3_FCA_2_EXTRACT]], ptr [[A3_SROA_4]], align 8, !tbaa [[TBAA7]] ; CHECK-NEXT: [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1:%.*]] = load volatile i16, ptr [[A3_SROA_0]], align 8 ; CHECK-NEXT: store volatile i16 [[A3_SROA_0_0_A3_SROA_0_0_COPYLOAD1]], ptr [[DST]], align 1 ; CHECK-NEXT: [[A3_SROA_3_0_DST_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 2 @@ -553,4 +553,17 @@ declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias ; CHECK: [[META2]] = !{!"float", [[META3:![0-9]+]], i64 0} ; CHECK: [[META3]] = !{!"omnipotent char", [[META4:![0-9]+]], i64 0} ; CHECK: [[META4]] = !{!"Simple C++ TBAA"} +; CHECK: [[TBAA_STRUCT5]] = !{i64 0, i64 4, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT6]] = !{i64 0, i64 8, [[TBAA7]], i64 8, i64 4, [[TBAA1]]} +; CHECK: [[TBAA7]] = !{[[META8:![0-9]+]], [[META8]], i64 0} +; CHECK: [[META8]] = !{!"v2f32", [[META3]], i64 0} +; CHECK: [[TBAA_STRUCT9]] = !{i64 0, i64 4, [[TBAA1]], i64 4, i64 8, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT10]] = !{i64 0, i64 8, [[TBAA7]], i64 4, i64 8, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT11]] = !{i64 0, i64 2, [[TBAA1]], i64 2, i64 6, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT12]] = !{i64 0, i64 7, [[TBAA7]], i64 7, i64 1, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT13]] = !{i64 0, i64 2, [[TBAA1]], i64 2, i64 2, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT14]] = !{i64 0, i64 3, [[TBAA1]]} +; CHECK: [[TBAA_STRUCT15]] = !{i64 0, i64 4, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT16]] = !{i64 0, i64 2, [[TBAA7]], i64 4, i64 4, [[TBAA7]], i64 8, i64 1, [[TBAA7]]} +; CHECK: [[TBAA_STRUCT17]] = !{i64 0, i64 4, [[TBAA7]], i64 4, i64 1, [[TBAA7]]} ;. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits