llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Finn Plummer (inbelic)

<details>
<summary>Changes</summary>

This pr implements an initial simple type inference used to improve the types 
of a  `load`/`store` replacing a `memcpy` when the `%dest` and/or `%src` are an 
`alloca` instruction.

As noted within `SimplifyAnyMemTransfer`, there exists an opportunity to find a 
better type than the default integer type.

By  inferring the type we are able to allow for unpacking of the value which 
allows for further optimization opportunities that are otherwise ignored. It 
also has the additional benefit of generating a more understandable 
canonicalized form of the IR.

- Updates `InstCombineCalls::SimplifyAnyMemTransfer` to add type inference
- Adds test to demonstrate unpacking behaviour
- Adds test to ensure we don't replace with an invalid alloca type
- Updates to existing tests are all demonstrations of missed unpacking 
opportunities, or, a conversion of a primitive type

Incidentally, this resolves https://github.com/llvm/llvm-project/issues/165753. 
For more context on the HLSL specific side, please refer to [this 
comment](https://github.com/llvm/llvm-project/pull/169384#issuecomment-3577589275)
 and preceding pr description.

---
Full diff: https://github.com/llvm/llvm-project/pull/169966.diff


6 Files Affected:

- (modified) clang/test/CodeGenCXX/auto-var-init.cpp (+13-4) 
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+31-4) 
- (modified) llvm/test/Transforms/InstCombine/alloca.ll (+20-8) 
- (modified) llvm/test/Transforms/InstCombine/memcpy_alloca.ll (+59) 
- (modified) 
llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll 
(+10-13) 
- (modified) llvm/test/Transforms/PhaseOrdering/swap-promotion.ll (+6-2) 


``````````diff
diff --git a/clang/test/CodeGenCXX/auto-var-init.cpp 
b/clang/test/CodeGenCXX/auto-var-init.cpp
index 67bc5d417bce9..bc7290b345651 100644
--- a/clang/test/CodeGenCXX/auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/auto-var-init.cpp
@@ -79,7 +79,7 @@ struct nullinit { char* null = nullptr; };
 // ZERO-O0: @__const.test_padded_custom.custom = private unnamed_addr constant 
{ i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 4
 // PATTERN-O1-NOT: @__const.test_padded_uninit.uninit
 // PATTERN-O1-NOT: @__const.test_padded_custom.custom
-// ZERO-O1-NOT: @__const.test_padded_custom.custom
+// ZERO-O1: @__const.test_padded_custom.custom = private unnamed_addr constant 
{ i8, [3 x i8], i32 } { i8 42, [3 x i8] zeroinitializer, i32 13371337 }, align 8
 struct padded { char c; int i; };
 // PATTERN-O0: @__const.test_paddednullinit_uninit.uninit = private 
unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] 
c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4
 // PATTERN-O0: @__const.test_paddednullinit_braces.braces = private 
unnamed_addr constant { i8, [3 x i8], i32 } { i8 [[I8]], [3 x i8] 
c"\[[IC]]\[[IC]]\[[IC]]", i32 [[I32]] }, align 4
@@ -713,6 +713,13 @@ TEST_CUSTOM(padded, padded, { 42, 13371337 });
 // CHECK-NEXT:  call void @llvm.memcpy
 // CHECK-NOT:   !annotation
 // CHECK-NEXT:  call void @{{.*}}used{{.*}}%custom)
+// ZERO-O1:     %custom = alloca %struct.padded, align 4
+// ZERO-O1:     %0 = load %struct.padded, ptr 
@__const.test_padded_custom.custom, align 8
+// ZERO-O1:     %[[I8:.*]] = extractvalue %struct.padded %0, 0
+// ZERO-O1:     store i8 %[[I8]], ptr %custom, align 4
+// ZERO-O1:     %[[I32:.*]] = extractvalue %struct.padded %0, 1
+// ZERO-O1:     %[[GEP:.*]] = getelementptr inbounds nuw i8, ptr %custom, i64 4
+// ZERO-O1:     store i32 %[[I32]], ptr %[[GEP]], align 4
 
 TEST_UNINIT(paddednullinit, paddednullinit);
 // CHECK-LABEL: @test_paddednullinit_uninit()
@@ -1298,7 +1305,9 @@ TEST_CUSTOM(semivolatile, semivolatile, { 0x44444444, 
0x44444444 });
 // PATTERN-O1:       store i32 1145324612, ptr %custom, align 4
 // PATTERN-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr 
%custom, i64 4
 // PATTERN-O1-NEXT:  store i32 1145324612, ptr %[[I]], align 4
-// ZERO-O1:          store i64 4919131752989213764, ptr %custom, align 8
+// ZERO-O1:       store i32 1145324612, ptr %custom, align 4
+// ZERO-O1-NEXT:  %[[I:[^ ]*]] = getelementptr inbounds nuw i8, ptr %custom, 
i64 4
+// ZERO-O1-NEXT:  store i32 1145324612, ptr %[[I]], align 4
 // CHECK-NOT:   !annotation
 
 TEST_UNINIT(semivolatileinit, semivolatileinit);
@@ -1441,7 +1450,7 @@ TEST_CUSTOM(matchingreverse, matchingreverse, { .i = 
0xf00f });
 // CHECK-NOT:   !annotation
 // CHECK-O0:    call void @{{.*}}used{{.*}}%custom)
 // PATTERN-O1:  store i32 61455, ptr %custom, align 4
-// ZERO-O1:     store i32 61455, ptr %custom, align 4
+// ZERO-O1:     store float 0x379E01E000000000, ptr %custom, align 4
 // CHECK-NOT:   !annotation
 
 TEST_UNINIT(unmatched, unmatched);
@@ -1527,7 +1536,7 @@ TEST_CUSTOM(unmatchedfp, unmatchedfp, { .d = 
3.1415926535897932384626433 });
 // CHECK-NOT:   !annotation
 // CHECK-O0:    call void @{{.*}}used{{.*}}%custom)
 // PATTERN-O1:  store double 0x400921FB54442D18, ptr %custom, align 8
-// ZERO-O1:     store i64 4614256656552045848, ptr %custom, align 8
+// ZERO-O1:  store double 0x400921FB54442D18, ptr %custom, align 8
 // CHECK-NOT:   !annotation
 
 TEST_UNINIT(emptyenum, emptyenum);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp 
b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 92fca90ddb88a..b9c2a6681f687 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -114,6 +114,31 @@ static bool hasUndefSource(AnyMemTransferInst *MI) {
   return isa<AllocaInst>(Src) && Src->hasOneUse();
 }
 
+// Optimistically infer a type from either the Src or Dest. Prefers the Src
+// over the Dest type.
+//
+// Returns the DefaultTy if unable to infer a type, or, if inferred type does
+// not match the size of load/store.
+static Type *inferType(const DataLayout &DL, IntegerType *DefaultTy, Value 
*Src,
+                       Value *Dest) {
+  Type *SrcTy = nullptr;
+  Type *DestTy = nullptr;
+
+  if (auto *SrcAI = dyn_cast<AllocaInst>(Src))
+    SrcTy = SrcAI->getAllocatedType();
+
+  if (auto *DestAI = dyn_cast<AllocaInst>(Dest))
+    DestTy = DestAI->getAllocatedType();
+
+  Type *InferredTy = SrcTy ? SrcTy : DestTy;
+
+  if (InferredTy &&
+      DefaultTy->getPrimitiveSizeInBits() == DL.getTypeSizeInBits(InferredTy))
+    return InferredTy;
+
+  return DefaultTy;
+}
+
 Instruction *InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
   Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
   MaybeAlign CopyDstAlign = MI->getDestAlign();
@@ -169,16 +194,18 @@ Instruction 
*InstCombinerImpl::SimplifyAnyMemTransfer(AnyMemTransferInst *MI) {
     if (*CopyDstAlign < Size || *CopySrcAlign < Size)
       return nullptr;
 
-  // Use an integer load+store unless we can find something better.
-  IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
-
   // If the memcpy has metadata describing the members, see if we can get the
   // TBAA, scope and noalias tags describing our copy.
   AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
 
   Value *Src = MI->getArgOperand(1);
   Value *Dest = MI->getArgOperand(0);
-  LoadInst *L = Builder.CreateLoad(IntType, Src);
+
+  // Use an integer load+store unless we can find something better.
+  IntegerType *IntType = IntegerType::get(MI->getContext(), Size << 3);
+  Type *InferredType = inferType(DL, IntType, Src, Dest);
+
+  LoadInst *L = Builder.CreateLoad(InferredType, Src);
   // Alignment from the mem intrinsic will be better, so use it.
   L->setAlignment(*CopySrcAlign);
   L->setAAMetadata(AACopyMD);
diff --git a/llvm/test/Transforms/InstCombine/alloca.ll 
b/llvm/test/Transforms/InstCombine/alloca.ll
index 15b43e780f149..de8543e333e54 100644
--- a/llvm/test/Transforms/InstCombine/alloca.ll
+++ b/llvm/test/Transforms/InstCombine/alloca.ll
@@ -189,24 +189,36 @@ define void @test9(ptr %a) {
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, 
align 1
-; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
-; CHECK-NEXT:    store i64 [[TMP0]], ptr [[ARGMEM]], align 4
+; CHECK-NEXT:    [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
+; CHECK-NEXT:    [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr 
[[A]], i64 4
+; CHECK-NEXT:    [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], 
align 4
+; CHECK-NEXT:    store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4
+; CHECK-NEXT:    [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr 
[[ARGMEM]], i64 4
+; CHECK-NEXT:    store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], 
align 4
 ; CHECK-NEXT:    call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] 
}>) [[ARGMEM]])
 ; CHECK-NEXT:    ret void
 ;
 ; P32-LABEL: @test9(
 ; P32-NEXT:  entry:
 ; P32-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, 
align 1
-; P32-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
-; P32-NEXT:    store i64 [[TMP0]], ptr [[ARGMEM]], align 4
+; P32-NEXT:    [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
+; P32-NEXT:    [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr 
[[A]], i32 4
+; P32-NEXT:    [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], 
align 4
+; P32-NEXT:    store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 4
+; P32-NEXT:    [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr 
[[ARGMEM]], i32 4
+; P32-NEXT:    store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 4
 ; P32-NEXT:    call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] 
}>) [[ARGMEM]])
 ; P32-NEXT:    ret void
 ;
 ; NODL-LABEL: @test9(
 ; NODL-NEXT:  entry:
 ; NODL-NEXT:    [[ARGMEM:%.*]] = alloca inalloca <{ [[STRUCT_TYPE:%.*]] }>, 
align 8
-; NODL-NEXT:    [[TMP0:%.*]] = load i64, ptr [[A:%.*]], align 4
-; NODL-NEXT:    store i64 [[TMP0]], ptr [[ARGMEM]], align 8
+; NODL-NEXT:    [[DOTUNPACK_UNPACK:%.*]] = load i32, ptr [[A:%.*]], align 4
+; NODL-NEXT:    [[DOTUNPACK_ELT1:%.*]] = getelementptr inbounds nuw i8, ptr 
[[A]], i64 4
+; NODL-NEXT:    [[DOTUNPACK_UNPACK2:%.*]] = load i32, ptr [[DOTUNPACK_ELT1]], 
align 4
+; NODL-NEXT:    store i32 [[DOTUNPACK_UNPACK]], ptr [[ARGMEM]], align 8
+; NODL-NEXT:    [[ARGMEM_REPACK4:%.*]] = getelementptr inbounds nuw i8, ptr 
[[ARGMEM]], i64 4
+; NODL-NEXT:    store i32 [[DOTUNPACK_UNPACK2]], ptr [[ARGMEM_REPACK4]], align 
4
 ; NODL-NEXT:    call void @test9_aux(ptr nonnull inalloca(<{ [[STRUCT_TYPE]] 
}>) [[ARGMEM]])
 ; NODL-NEXT:    ret void
 ;
@@ -251,8 +263,8 @@ entry:
 
 define void @test_inalloca_with_element_count(ptr %a) {
 ; ALL-LABEL: @test_inalloca_with_element_count(
-; ALL-NEXT:    [[ALLOCA1:%.*]] = alloca inalloca [10 x %struct_type], align 4
-; ALL-NEXT:    call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE:%.*]]) 
[[ALLOCA1]])
+; ALL-NEXT:    [[ALLOCA1:%.*]] = alloca inalloca [10 x [[STRUCT_TYPE:%.*]]], 
align 4
+; ALL-NEXT:    call void @test9_aux(ptr nonnull inalloca([[STRUCT_TYPE]]) 
[[ALLOCA1]])
 ; ALL-NEXT:    ret void
 ;
   %alloca = alloca inalloca %struct_type, i32 10, align 4
diff --git a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll 
b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll
index b86066c2776e8..fb795cbbadc78 100644
--- a/llvm/test/Transforms/InstCombine/memcpy_alloca.ll
+++ b/llvm/test/Transforms/InstCombine/memcpy_alloca.ll
@@ -71,4 +71,63 @@ define void @test6(ptr %dest) {
   ret void
 }
 
+; Infer the type of the generated load/store when possible from an alloca
+
+define void @test7(ptr %src, ptr %dest) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    %[[UNPACK0:.*]] = load i32, ptr %src, align 1
+; CHECK-NEXT:    %[[SRC_GEP:.*]] = getelementptr inbounds nuw i8, ptr %src, 
i64 4
+; CHECK-NEXT:    %[[UNPACK1:.*]] = load i32, ptr %[[SRC_GEP]], align 1
+; CHECK-NEXT:    store i32 %[[UNPACK0]], ptr %dest, align 1
+; CHECK-NEXT:    %[[DEST_GEP:.*]] = getelementptr inbounds nuw i8, ptr %dest, 
i64 4
+; CHECK-NEXT:    store i32 %[[UNPACK1]], ptr %[[DEST_GEP]], align 1
+; CHECK-NEXT:    ret void
+;
+  %temp = alloca [2 x i32], align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 8, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %temp, i32 8, i1 false)
+
+  ret void
+}
+
+; Ensure we don't use alloca type if only paritally copying
+
+define void @test8(ptr %src, ptr %dest) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    %[[LI:.*]] = load i32, ptr %src, align 1
+; CHECK-NEXT:    store i32 %[[LI]], ptr %dest, align 1
+; CHECK-NEXT:    ret void
+;
+  %temp = alloca [2 x i32], align 4
+  call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 4, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %temp, i32 4, i1 false)
+
+  ret void
+}
+
+; Ensure we don't use alloca type if they don't agree
+
+define double @test9(ptr %src, ptr %dest) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    %[[TEMP:.*]] = alloca double, align 1
+; CHECK-NEXT:    %[[UNPACK0:.*]] = load i32, ptr %src, align 1
+; CHECK-NEXT:    %[[SRC_GEP:.*]] = getelementptr inbounds nuw i8, ptr %src, 
i64 4
+; CHECK-NEXT:    %[[UNPACK1:.*]] = load i32, ptr %[[SRC_GEP]], align 1
+; CHECK-NEXT:    store i32 %[[UNPACK0]], ptr %[[TEMP]], align 1
+; CHECK-NEXT:    %[[TEMP_GEP:.*]] = getelementptr inbounds nuw i8, ptr 
%[[TEMP]], i64 4
+; CHECK-NEXT:    store i32 %[[UNPACK1]], ptr %[[TEMP_GEP]], align 1
+; CHECK-NEXT:    %[[RES:.*]] = load double, ptr %[[TEMP]]
+; CHECK-NEXT:    ret double %[[RES]]
+;
+  %temp = alloca [2 x i32], align 4
+  %out = alloca double, align 1
+
+  call void @llvm.memcpy.p0.p0.i32(ptr %temp, ptr %src, i32 8, i1 false)
+  call void @llvm.memcpy.p0.p0.i32(ptr %out, ptr %temp, i32 8, i1 false)
+
+  %res = load double, ptr %out
+
+  ret double %res
+}
+
 declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
diff --git 
a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll 
b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
index f42101ffe89aa..e346ee8414052 100644
--- 
a/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
+++ 
b/llvm/test/Transforms/PhaseOrdering/X86/SROA-after-final-loop-unrolling-2.ll
@@ -28,30 +28,27 @@ define dso_local void @foo(i32 noundef %arg, ptr noundef 
nonnull align 4 derefer
 ; CHECK-NEXT:    [[ARG_OFF:%.*]] = add i32 [[ARG]], 127
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[ARG_OFF]], 255
 ; CHECK-NEXT:    br i1 [[TMP0]], label %[[BB12:.*]], label %[[BB13:.*]]
-; CHECK:       [[BB12_LOOPEXIT:.*]]:
-; CHECK-NEXT:    [[I3_SROA_8_0_INSERT_EXT:%.*]] = zext i32 [[I21_3:%.*]] to i64
-; CHECK-NEXT:    [[I3_SROA_8_0_INSERT_SHIFT:%.*]] = shl nuw i64 
[[I3_SROA_8_0_INSERT_EXT]], 32
-; CHECK-NEXT:    [[I3_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[I21_2:%.*]] to i64
-; CHECK-NEXT:    [[I3_SROA_0_0_INSERT_INSERT:%.*]] = or disjoint i64 
[[I3_SROA_8_0_INSERT_SHIFT]], [[I3_SROA_0_0_INSERT_EXT]]
-; CHECK-NEXT:    br label %[[BB12]]
 ; CHECK:       [[BB12]]:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i64 [ [[I3_SROA_0_0_INSERT_INSERT]], 
%[[BB12_LOOPEXIT]] ], [ 180388626456, %[[BB]] ]
-; CHECK-NEXT:    store i64 [[TMP1]], ptr [[ARG1]], align 4, !tbaa 
[[CHAR_TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x i32> [ <i32 24, i32 42>, %[[BB]] ], [ 
[[I3_SROA_0_4_VEC_INSERT33:%.*]], %[[BB13]] ]
+; CHECK-NEXT:    store <2 x i32> [[TMP2]], ptr [[ARG1]], align 4, !tbaa 
[[CHAR_TBAA5:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ; CHECK:       [[BB13]]:
-; CHECK-NEXT:    [[I3_SROA_8_0:%.*]] = phi i32 [ [[I21_3]], %[[BB13]] ], [ 42, 
%[[BB]] ]
-; CHECK-NEXT:    [[I3_SROA_0_0:%.*]] = phi i32 [ [[I21_2]], %[[BB13]] ], [ 24, 
%[[BB]] ]
+; CHECK-NEXT:    [[I3_SROA_0_1:%.*]] = phi <2 x i32> [ 
[[I3_SROA_0_4_VEC_INSERT33]], %[[BB13]] ], [ <i32 24, i32 42>, %[[BB]] ]
 ; CHECK-NEXT:    [[I4_05:%.*]] = phi i32 [ [[I24_3:%.*]], %[[BB13]] ], [ 0, 
%[[BB]] ]
+; CHECK-NEXT:    [[I3_SROA_0_0:%.*]] = extractelement <2 x i32> 
[[I3_SROA_0_1]], i64 0
 ; CHECK-NEXT:    [[I21:%.*]] = mul nsw i32 [[I3_SROA_0_0]], [[I4_05]]
 ; CHECK-NEXT:    [[I24:%.*]] = or disjoint i32 [[I4_05]], 1
+; CHECK-NEXT:    [[I3_SROA_8_0:%.*]] = extractelement <2 x i32> 
[[I3_SROA_0_1]], i64 1
 ; CHECK-NEXT:    [[I21_1:%.*]] = mul nsw i32 [[I3_SROA_8_0]], [[I24]]
 ; CHECK-NEXT:    [[I24_1:%.*]] = or disjoint i32 [[I4_05]], 2
-; CHECK-NEXT:    [[I21_2]] = mul nsw i32 [[I21]], [[I24_1]]
+; CHECK-NEXT:    [[I21_2:%.*]] = mul nsw i32 [[I21]], [[I24_1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[I21_2]], 
i64 0
 ; CHECK-NEXT:    [[I24_2:%.*]] = or disjoint i32 [[I4_05]], 3
-; CHECK-NEXT:    [[I21_3]] = mul nsw i32 [[I21_1]], [[I24_2]]
+; CHECK-NEXT:    [[I21_3:%.*]] = mul nsw i32 [[I21_1]], [[I24_2]]
+; CHECK-NEXT:    [[I3_SROA_0_4_VEC_INSERT33]] = insertelement <2 x i32> 
[[TMP1]], i32 [[I21_3]], i64 1
 ; CHECK-NEXT:    [[I24_3]] = add nuw nsw i32 [[I4_05]], 4
 ; CHECK-NEXT:    [[I11_NOT_3:%.*]] = icmp eq i32 [[I24_3]], [[I10]]
-; CHECK-NEXT:    br i1 [[I11_NOT_3]], label %[[BB12_LOOPEXIT]], label 
%[[BB13]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[I11_NOT_3]], label %[[BB12]], label %[[BB13]], 
!llvm.loop [[LOOP8:![0-9]+]]
 ;
 bb:
   %i = alloca i32, align 4
diff --git a/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll 
b/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll
index 059a122262786..1f9ec9b6ac5a1 100644
--- a/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll
+++ b/llvm/test/Transforms/PhaseOrdering/swap-promotion.ll
@@ -5,10 +5,14 @@
 
 define void @swap(ptr %p1, ptr %p2) {
 ; CHECK-LABEL: @swap(
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[P1:%.*]], align 1
+; CHECK-NEXT:    [[DOTUNPACK:%.*]] = load i32, ptr [[P1:%.*]], align 1
+; CHECK-NEXT:    [[DOTELT1:%.*]] = getelementptr inbounds nuw i8, ptr [[P1]], 
i64 4
+; CHECK-NEXT:    [[DOTUNPACK2:%.*]] = load i32, ptr [[DOTELT1]], align 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[P2:%.*]], align 1
 ; CHECK-NEXT:    store i64 [[TMP2]], ptr [[P1]], align 1
-; CHECK-NEXT:    store i64 [[TMP1]], ptr [[P2]], align 1
+; CHECK-NEXT:    store i32 [[DOTUNPACK]], ptr [[P2]], align 1
+; CHECK-NEXT:    [[P2_REPACK8:%.*]] = getelementptr inbounds nuw i8, ptr 
[[P2]], i64 4
+; CHECK-NEXT:    store i32 [[DOTUNPACK2]], ptr [[P2_REPACK8]], align 1
 ; CHECK-NEXT:    ret void
 ;
   %tmp = alloca [2 x i32]

``````````

</details>


https://github.com/llvm/llvm-project/pull/169966
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to