Author: nora
Date: 2026-02-06T09:22:45Z
New Revision: d38f0964df37c2d6753b8dd6560788d49d532972

URL: 
https://github.com/llvm/llvm-project/commit/d38f0964df37c2d6753b8dd6560788d49d532972
DIFF: 
https://github.com/llvm/llvm-project/commit/d38f0964df37c2d6753b8dd6560788d49d532972.diff

LOG: [VPlan] Create edge mask for single-destination switch (#179107)

When converting phis to blends, the `VPPredicator` expects to have edge
masks to the phi node if the phi node has different incoming blocks.
This was not the case if the predecessor of the phi was a switch where a
conditional destination was the same as the default destination.

This was because when creating edge masks in `createSwitchEdgeMasks`,
edge masks are set in a loop through the *non-default* destinations. But
when there are no non-default destinations (but at least one condition,
otherwise an earlier condition would trigger and just forward the source
mask), this loop is never executed, so the masks are never set.

To resolve this, we explicitly forward the source mask for these cases
as well, which is correct because it is an unconditional branch, just a
very convoluted one.

fixes #179074

(cherry picked from commit 3bbf748a63a3cb38271a478b520789be57d5e2c8)

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
    llvm/test/Transforms/LoopVectorize/predicate-switch.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp 
b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index f7e7fc29bc203..7f787dcbbf3cb 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -225,6 +225,10 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction 
*SI) {
     DefaultMask = Builder.createNot(DefaultMask);
     if (SrcMask)
       DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
+  } else {
+    // There are no destinations other than the default destination, so this is
+    // an unconditional branch.
+    DefaultMask = SrcMask;
   }
   setEdgeMask(Src, DefaultDst, DefaultMask);
 }

diff  --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll 
b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 3276528e54225..acddbfd6aab81 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -508,6 +508,150 @@ define void @switch_unconditional(ptr %start) {
 ; IC2:       [[EXIT]]:
 ; IC2-NEXT:    ret void
 ;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %gep = getelementptr i32, ptr %start, i64 %iv
+  %x = load i32, ptr %gep
+  switch i32 %x, label %foo []
+
+foo:
+  br label %loop.latch
+
+loop.latch:
+  store i32 0, ptr %gep
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp eq i64 %iv.next, 100
+  br i1 %cmp, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+define void @switch_unconditional_duplicate_target(ptr %start, ptr %dest) {
+; IC1-LABEL: define void @switch_unconditional_duplicate_target(
+; IC1-SAME: ptr [[START:%.*]], ptr [[DEST:%.*]]) {
+; IC1-NEXT:  [[ENTRY:.*:]]
+; IC1-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
+; IC1:       [[VECTOR_MEMCHECK]]:
+; IC1-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST]], i64 4
+; IC1-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[START]], i64 400
+; IC1-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]]
+; IC1-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]]
+; IC1-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; IC1-NEXT:    br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label 
%[[VECTOR_PH:.*]]
+; IC1:       [[VECTOR_PH]]:
+; IC1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, 
ptr [[DEST]], i64 0
+; IC1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> 
[[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC1:       [[VECTOR_BODY]]:
+; IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, 
%[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> 
[[VEC_IND]]
+; IC1-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
+; IC1-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, 
!alias.scope [[META6:![0-9]+]]
+; IC1-NEXT:    [[TMP5:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD]], splat (i32 10)
+; IC1-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> 
[[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]]
+; IC1-NEXT:    [[PREDPHI2:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> 
[[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI]]
+; IC1-NEXT:    [[TMP1:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 0
+; IC1-NEXT:    [[TMP2:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 1
+; IC1-NEXT:    store i32 0, ptr [[TMP1]], align 4
+; IC1-NEXT:    store i32 0, ptr [[TMP2]], align 4
+; IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; IC1-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; IC1-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; IC1-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label 
%[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; IC1:       [[MIDDLE_BLOCK]]:
+; IC1-NEXT:    br label %[[EXIT:.*]]
+; IC1:       [[SCALAR_PH]]:
+; IC1-NEXT:    br label %[[LOOP:.*]]
+; IC1:       [[LOOP]]:
+; IC1-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], 
%[[LATCH:.*]] ]
+; IC1-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[START]], i64 [[IV]]
+; IC1-NEXT:    [[X:%.*]] = load i32, ptr [[GEP]], align 4
+; IC1-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], 10
+; IC1-NEXT:    br i1 [[COND]], label %[[FORWARD:.*]], label %[[LATCH]]
+; IC1:       [[FORWARD]]:
+; IC1-NEXT:    switch i32 [[X]], label %[[LATCH]] [
+; IC1-NEXT:      i32 0, label %[[LATCH]]
+; IC1-NEXT:    ]
+; IC1:       [[LATCH]]:
+; IC1-NEXT:    [[GEP_1:%.*]] = phi ptr [ [[GEP]], %[[LOOP]] ], [ [[DEST]], 
%[[FORWARD]] ], [ [[DEST]], %[[FORWARD]] ]
+; IC1-NEXT:    store i32 0, ptr [[GEP_1]], align 4
+; IC1-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; IC1-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
+; IC1-NEXT:    br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop 
[[LOOP10:![0-9]+]]
+; IC1:       [[EXIT]]:
+; IC1-NEXT:    ret void
+;
+; IC2-LABEL: define void @switch_unconditional_duplicate_target(
+; IC2-SAME: ptr [[START:%.*]], ptr [[DEST:%.*]]) {
+; IC2-NEXT:  [[ENTRY:.*:]]
+; IC2-NEXT:    br label %[[VECTOR_MEMCHECK:.*]]
+; IC2:       [[VECTOR_MEMCHECK]]:
+; IC2-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST]], i64 4
+; IC2-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[START]], i64 400
+; IC2-NEXT:    [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]]
+; IC2-NEXT:    [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]]
+; IC2-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; IC2-NEXT:    br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label 
%[[VECTOR_PH:.*]]
+; IC2:       [[VECTOR_PH]]:
+; IC2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, 
ptr [[DEST]], i64 0
+; IC2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> 
[[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC2:       [[VECTOR_BODY]]:
+; IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ 
[[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, 
%[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; IC2-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> 
[[VEC_IND]]
+; IC2-NEXT:    [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
+; IC2-NEXT:    [[TMP1:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> 
[[STEP_ADD]]
+; IC2-NEXT:    [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 2
+; IC2-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, 
!alias.scope [[META6:![0-9]+]]
+; IC2-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, 
!alias.scope [[META6]]
+; IC2-NEXT:    [[TMP9:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD]], splat (i32 10)
+; IC2-NEXT:    [[TMP10:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD2]], splat (i32 
10)
+; IC2-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> 
[[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]]
+; IC2-NEXT:    [[PREDPHI2:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> 
[[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI]]
+; IC2-NEXT:    [[TMP2:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 0
+; IC2-NEXT:    [[TMP3:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 1
+; IC2-NEXT:    [[PREDPHI5:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> 
[[BROADCAST_SPLAT]], <2 x ptr> [[TMP1]]
+; IC2-NEXT:    [[PREDPHI4:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> 
[[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI5]]
+; IC2-NEXT:    [[TMP4:%.*]] = extractelement <2 x ptr> [[PREDPHI4]], i32 0
+; IC2-NEXT:    [[TMP5:%.*]] = extractelement <2 x ptr> [[PREDPHI4]], i32 1
+; IC2-NEXT:    store i32 0, ptr [[TMP2]], align 4
+; IC2-NEXT:    store i32 0, ptr [[TMP3]], align 4
+; IC2-NEXT:    store i32 0, ptr [[TMP4]], align 4
+; IC2-NEXT:    store i32 0, ptr [[TMP5]], align 4
+; IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; IC2-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; IC2-NEXT:    br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label 
%[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; IC2:       [[MIDDLE_BLOCK]]:
+; IC2-NEXT:    br label %[[EXIT:.*]]
+; IC2:       [[SCALAR_PH]]:
+; IC2-NEXT:    br label %[[LOOP:.*]]
+; IC2:       [[LOOP]]:
+; IC2-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], 
%[[LATCH:.*]] ]
+; IC2-NEXT:    [[GEP:%.*]] = getelementptr i32, ptr [[START]], i64 [[IV]]
+; IC2-NEXT:    [[X:%.*]] = load i32, ptr [[GEP]], align 4
+; IC2-NEXT:    [[COND:%.*]] = icmp ult i32 [[X]], 10
+; IC2-NEXT:    br i1 [[COND]], label %[[FORWARD:.*]], label %[[LATCH]]
+; IC2:       [[FORWARD]]:
+; IC2-NEXT:    switch i32 [[X]], label %[[LATCH]] [
+; IC2-NEXT:      i32 0, label %[[LATCH]]
+; IC2-NEXT:    ]
+; IC2:       [[LATCH]]:
+; IC2-NEXT:    [[GEP_1:%.*]] = phi ptr [ [[GEP]], %[[LOOP]] ], [ [[DEST]], 
%[[FORWARD]] ], [ [[DEST]], %[[FORWARD]] ]
+; IC2-NEXT:    store i32 0, ptr [[GEP_1]], align 4
+; IC2-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; IC2-NEXT:    [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100
+; IC2-NEXT:    br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop 
[[LOOP10:![0-9]+]]
+; IC2:       [[EXIT]]:
+; IC2-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -515,13 +659,15 @@ loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
   %gep = getelementptr i32, ptr %start, i64 %iv
   %x = load i32, ptr %gep
-  switch i32 %x, label %foo []
+  %cond = icmp ult i32 %x, 10
+  br i1 %cond, label %forward, label %latch
 
-foo:
-  br label %latch
+forward:
+  switch i32 %x, label %latch [ i32 0, label %latch ]
 
 latch:
-  store i32 0, ptr %gep
+  %gep.1 = phi ptr [ %gep, %loop ], [ %dest, %forward ], [ %dest, %forward ]
+  store i32 0, ptr %gep.1
   %iv.next = add i64 %iv, 1
   %cmp = icmp eq i64 %iv.next, 100
   br i1 %cmp, label %exit, label %loop
@@ -537,6 +683,11 @@ exit:
 ; IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
 ; IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
 ; IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
+; IC1: [[META6]] = !{[[META7:![0-9]+]]}
+; IC1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; IC1: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
+; IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
 ;.
 ; IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -544,4 +695,9 @@ exit:
 ; IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
 ; IC2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
 ; IC2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
+; IC2: [[META6]] = !{[[META7:![0-9]+]]}
+; IC2: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]}
+; IC2: [[META8]] = distinct !{[[META8]], !"LVerDomain"}
+; IC2: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]}
+; IC2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]}
 ;.


        
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to