Author: nora Date: 2026-02-06T09:22:45Z New Revision: d38f0964df37c2d6753b8dd6560788d49d532972
URL: https://github.com/llvm/llvm-project/commit/d38f0964df37c2d6753b8dd6560788d49d532972 DIFF: https://github.com/llvm/llvm-project/commit/d38f0964df37c2d6753b8dd6560788d49d532972.diff LOG: [VPlan] Create edge mask for single-destination switch (#179107) When converting phis to blends, the `VPPredicator` expects to have edge masks to the phi node if the phi node has different incoming blocks. This was not the case if the predecessor of the phi was a switch where a conditional destination was the same as the default destination. This was because when creating edge masks in `createSwitchEdgeMasks`, edge masks are set in a loop through the *non-default* destinations. But when there are no non-default destinations (but at least one condition, otherwise an earlier condition would trigger and just forward the source mask), this loop is never executed, so the masks are never set. To resolve this, we explicitly forward the source mask for these cases as well, which is correct because it is an unconditional branch, just a very convoluted one. fixes #179074 (cherry picked from commit 3bbf748a63a3cb38271a478b520789be57d5e2c8) Added: Modified: llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp llvm/test/Transforms/LoopVectorize/predicate-switch.ll Removed: ################################################################################ diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp index f7e7fc29bc203..7f787dcbbf3cb 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp @@ -225,6 +225,10 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) { DefaultMask = Builder.createNot(DefaultMask); if (SrcMask) DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask); + } else { + // There are no destinations other than the default destination, so this is + // an unconditional branch. + DefaultMask = SrcMask; } setEdgeMask(Src, DefaultDst, DefaultMask); } diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll index 3276528e54225..acddbfd6aab81 100644 --- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll +++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll @@ -508,6 +508,150 @@ define void @switch_unconditional(ptr %start) { ; IC2: [[EXIT]]: ; IC2-NEXT: ret void ; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %gep = getelementptr i32, ptr %start, i64 %iv + %x = load i32, ptr %gep + switch i32 %x, label %foo [] + +foo: + br label %loop.latch + +loop.latch: + store i32 0, ptr %gep + %iv.next = add i64 %iv, 1 + %cmp = icmp eq i64 %iv.next, 100 + br i1 %cmp, label %exit, label %loop.header + +exit: + ret void +} + +define void @switch_unconditional_duplicate_target(ptr %start, ptr %dest) { +; IC1-LABEL: define void @switch_unconditional_duplicate_target( +; IC1-SAME: ptr [[START:%.*]], ptr [[DEST:%.*]]) { +; IC1-NEXT: [[ENTRY:.*:]] +; IC1-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; IC1: [[VECTOR_MEMCHECK]]: +; IC1-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST]], i64 4 +; IC1-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[START]], i64 400 +; IC1-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] +; IC1-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]] +; IC1-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; IC1-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC1: [[VECTOR_PH]]: +; IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[DEST]], i64 0 +; IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer +; IC1-NEXT: br label %[[VECTOR_BODY:.*]] +; IC1: [[VECTOR_BODY]]: +; IC1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC1-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[VEC_IND]] +; IC1-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 +; IC1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4, !alias.scope [[META6:![0-9]+]] +; IC1-NEXT: [[TMP5:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD]], splat (i32 10) +; IC1-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]] +; IC1-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP5]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI]] +; IC1-NEXT: [[TMP1:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 0 +; IC1-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 1 +; IC1-NEXT: store i32 0, ptr [[TMP1]], align 4 +; IC1-NEXT: store i32 0, ptr [[TMP2]], align 4 +; IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; IC1-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; IC1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; IC1-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IC1: [[MIDDLE_BLOCK]]: +; IC1-NEXT: br label %[[EXIT:.*]] +; IC1: [[SCALAR_PH]]: +; IC1-NEXT: br label %[[LOOP:.*]] +; IC1: [[LOOP]]: +; IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; IC1-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[START]], i64 [[IV]] +; IC1-NEXT: [[X:%.*]] = load i32, ptr [[GEP]], align 4 +; IC1-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 10 +; IC1-NEXT: br i1 [[COND]], label %[[FORWARD:.*]], label %[[LATCH]] +; IC1: [[FORWARD]]: +; IC1-NEXT: switch i32 [[X]], label %[[LATCH]] [ +; IC1-NEXT: i32 0, label %[[LATCH]] +; IC1-NEXT: ] +; IC1: [[LATCH]]: +; IC1-NEXT: [[GEP_1:%.*]] = phi ptr [ [[GEP]], %[[LOOP]] ], [ [[DEST]], %[[FORWARD]] ], [ [[DEST]], %[[FORWARD]] ] +; IC1-NEXT: store i32 0, ptr [[GEP_1]], align 4 +; IC1-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; IC1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 +; IC1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; IC1: [[EXIT]]: +; IC1-NEXT: ret void +; +; IC2-LABEL: define void @switch_unconditional_duplicate_target( +; IC2-SAME: ptr [[START:%.*]], ptr [[DEST:%.*]]) { +; IC2-NEXT: [[ENTRY:.*:]] +; IC2-NEXT: br label %[[VECTOR_MEMCHECK:.*]] +; IC2: [[VECTOR_MEMCHECK]]: +; IC2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST]], i64 4 +; IC2-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[START]], i64 400 +; IC2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] +; IC2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]] +; IC2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; IC2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; IC2: [[VECTOR_PH]]: +; IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[DEST]], i64 0 +; IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer +; IC2-NEXT: br label %[[VECTOR_BODY:.*]] +; IC2: [[VECTOR_BODY]]: +; IC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC2-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; IC2-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2) +; IC2-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[VEC_IND]] +; IC2-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0 +; IC2-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[STEP_ADD]] +; IC2-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[TMP7]], i64 2 +; IC2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4, !alias.scope [[META6:![0-9]+]] +; IC2-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4, !alias.scope [[META6]] +; IC2-NEXT: [[TMP9:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD]], splat (i32 10) +; IC2-NEXT: [[TMP10:%.*]] = icmp ult <2 x i32> [[WIDE_LOAD2]], splat (i32 10) +; IC2-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP0]] +; IC2-NEXT: [[PREDPHI2:%.*]] = select <2 x i1> [[TMP9]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI]] +; IC2-NEXT: [[TMP2:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 0 +; IC2-NEXT: [[TMP3:%.*]] = extractelement <2 x ptr> [[PREDPHI2]], i32 1 +; IC2-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP1]] +; IC2-NEXT: [[PREDPHI4:%.*]] = select <2 x i1> [[TMP10]], <2 x ptr> [[BROADCAST_SPLAT]], <2 x ptr> [[PREDPHI5]] +; IC2-NEXT: [[TMP4:%.*]] = extractelement <2 x ptr> [[PREDPHI4]], i32 0 +; IC2-NEXT: [[TMP5:%.*]] = extractelement <2 x ptr> [[PREDPHI4]], i32 1 +; IC2-NEXT: store i32 0, ptr [[TMP2]], align 4 +; IC2-NEXT: store i32 0, ptr [[TMP3]], align 4 +; IC2-NEXT: store i32 0, ptr [[TMP4]], align 4 +; IC2-NEXT: store i32 0, ptr [[TMP5]], align 4 +; IC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; IC2-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2) +; IC2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 +; IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; IC2: [[MIDDLE_BLOCK]]: +; IC2-NEXT: br label %[[EXIT:.*]] +; IC2: [[SCALAR_PH]]: +; IC2-NEXT: br label %[[LOOP:.*]] +; IC2: [[LOOP]]: +; IC2-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; IC2-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[START]], i64 [[IV]] +; IC2-NEXT: [[X:%.*]] = load i32, ptr [[GEP]], align 4 +; IC2-NEXT: [[COND:%.*]] = icmp ult i32 [[X]], 10 +; IC2-NEXT: br i1 [[COND]], label %[[FORWARD:.*]], label %[[LATCH]] +; IC2: [[FORWARD]]: +; IC2-NEXT: switch i32 [[X]], label %[[LATCH]] [ +; IC2-NEXT: i32 0, label %[[LATCH]] +; IC2-NEXT: ] +; IC2: [[LATCH]]: +; IC2-NEXT: [[GEP_1:%.*]] = phi ptr [ [[GEP]], %[[LOOP]] ], [ [[DEST]], %[[FORWARD]] ], [ [[DEST]], %[[FORWARD]] ] +; IC2-NEXT: store i32 0, ptr [[GEP_1]], align 4 +; IC2-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; IC2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 100 +; IC2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; IC2: [[EXIT]]: +; IC2-NEXT: ret void +; entry: br label %loop @@ -515,13 +659,15 @@ loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] %gep = getelementptr i32, ptr %start, i64 %iv %x = load i32, ptr %gep - switch i32 %x, label %foo [] + %cond = icmp ult i32 %x, 10 + br i1 %cond, label %forward, label %latch -foo: - br label %latch +forward: + switch i32 %x, label %latch [ i32 0, label %latch ] latch: - store i32 0, ptr %gep + %gep.1 = phi ptr [ %gep, %loop ], [ %dest, %forward ], [ %dest, %forward ] + store i32 0, ptr %gep.1 %iv.next = add i64 %iv, 1 %cmp = icmp eq i64 %iv.next, 100 br i1 %cmp, label %exit, label %loop @@ -537,6 +683,11 @@ exit: ; IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ; IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; IC1: [[META6]] = !{[[META7:![0-9]+]]} +; IC1: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} +; IC1: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; IC1: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} +; IC1: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} ;. ; IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -544,4 +695,9 @@ exit: ; IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]} ; IC2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; IC2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; IC2: [[META6]] = !{[[META7:![0-9]+]]} +; IC2: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]]} +; IC2: [[META8]] = distinct !{[[META8]], !"LVerDomain"} +; IC2: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]], [[META2]]} +; IC2: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} ;. _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
