Author: Simon Pilgrim
Date: 2026-05-10T13:50:19+01:00
New Revision: af1d9cd89325b260aca5ca6b5556358bc3293458

URL: 
https://github.com/llvm/llvm-project/commit/af1d9cd89325b260aca5ca6b5556358bc3293458
DIFF: 
https://github.com/llvm/llvm-project/commit/af1d9cd89325b260aca5ca6b5556358bc3293458.diff

LOG: Revert "[VectorCombine] foldShuffleChainsToReduce - add support for 
partial v…"

This reverts commit a2942d472aac907af6f47f8c7658288609b6e1de.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smax.ll
    llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smin.ll
    llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umax.ll
    llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umin.ll
    llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp 
b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 10ad3a71c73de..5ba344ea9a808 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3995,8 +3995,6 @@ bool VectorCombine::foldShuffleChainsToReduce(Instruction 
&I) {
 
   InstWorklist.push(VecOpEE);
 
-  bool IsPartialReduction = false;
-
   while (!InstWorklist.empty()) {
     Value *CI = InstWorklist.front();
     InstWorklist.pop();
@@ -4127,19 +4125,12 @@ bool 
VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
 
       ShouldBeCallOrBinInst ^= 1;
     } else {
-      // Check if this is a partial reduction - the chain ended because
-      // the source vector is not a recognized op/shuffle.
-      if (ShouldBeCallOrBinInst && VisitedCnt >= 1 && CI == PrevVecV[0]) {
-        IsPartialReduction = true;
-        break;
-      }
       return false;
     }
   }
 
-  // Full reduction pattern should end with a shuffle op.
-  // Partial reduction ends when the source vector is reached.
-  if (ShouldBeCallOrBinInst && !IsPartialReduction)
+  // Pattern should end with a shuffle op.
+  if (ShouldBeCallOrBinInst)
     return false;
 
   assert(VecSize != -1 && "Expected Match for Vector Size");
@@ -4156,32 +4147,14 @@ bool 
VectorCombine::foldShuffleChainsToReduce(Instruction &I) {
   if (!ReducedOp)
     return false;
 
-  InstructionCost NewCost = 0;
-  FixedVectorType *ReduceVecTy = FinalVecVTy;
-  SmallVector<int> ExtractMask;
-
-  if (IsPartialReduction) {
-    unsigned SubVecSize = ShuffleMaskHalf;
-    ReduceVecTy = FixedVectorType::get(FVT->getElementType(), SubVecSize);
-    ExtractMask.resize(SubVecSize);
-    std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
-    NewCost +=
-        TTI.getShuffleCost(TargetTransformInfo::SK_ExtractSubvector,
-                           ReduceVecTy, FinalVecVTy, ExtractMask, CostKind, 0);
-  }
-
-  IntrinsicCostAttributes ICA(ReducedOp, ReduceVecTy, {ReduceVecTy});
-  NewCost += TTI.getIntrinsicInstrCost(ICA, CostKind);
+  IntrinsicCostAttributes ICA(ReducedOp, FinalVecVTy, {FinalVecV});
+  InstructionCost NewCost = TTI.getIntrinsicInstrCost(ICA, CostKind);
 
   if (NewCost >= OrigCost)
     return false;
 
-  Value *ReduceInput = FinalVecV;
-  if (IsPartialReduction)
-    ReduceInput = Builder.CreateShuffleVector(FinalVecV, ExtractMask);
-
-  auto *ReducedResult = Builder.CreateIntrinsic(
-      ReducedOp, {ReduceInput->getType()}, {ReduceInput});
+  auto *ReducedResult =
+      Builder.CreateIntrinsic(ReducedOp, {FinalVecV->getType()}, {FinalVecV});
   replaceValue(I, *ReducedResult);
 
   return true;

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smax.ll 
b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smax.ll
index ec8cd82b96a37..85186dba0891f 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smax.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smax.ll
@@ -314,8 +314,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 
 define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v16i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.smax.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x 
i16> [[A0]], <16 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> 
poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x 
i16> [[TMP2]], <16 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> 
poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <16 x i16> @llvm.smax.v16i16(<16 x 
i16> [[TMP4]], <16 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -333,8 +338,13 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 
 define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.smax.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.smax.v32i16(<32 x 
i16> [[A0]], <32 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i16> @llvm.smax.v32i16(<32 x 
i16> [[TMP2]], <32 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i16> @llvm.smax.v32i16(<32 x 
i16> [[TMP4]], <32 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <32 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef>
@@ -352,8 +362,15 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 
 define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.smax.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.smax.v32i8(<32 x i8> 
[[A0]], <32 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> 
poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i8> @llvm.smax.v32i8(<32 x i8> 
[[TMP2]], <32 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP4]], <32 x i8> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i8> @llvm.smax.v32i8(<32 x i8> 
[[TMP4]], <32 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP6]], <32 x i8> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <32 x i8> @llvm.smax.v32i8(<32 x i8> 
[[TMP6]], <32 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <32 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef>
@@ -374,8 +391,15 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 
 define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v64i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.smax.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.smax.v64i8(<64 x i8> 
[[A0]], <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> 
poison, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <64 x i8> @llvm.smax.v64i8(<64 x i8> 
[[TMP2]], <64 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <64 x i8> [[TMP4]], <64 x i8> 
poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <64 x i8> @llvm.smax.v64i8(<64 x i8> 
[[TMP4]], <64 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <64 x i8> [[TMP6]], <64 x i8> 
poison, <64 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <64 x i8> @llvm.smax.v64i8(<64 x i8> 
[[TMP6]], <64 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <64 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smin.ll 
b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smin.ll
index 650947d240ace..80c2929b5d5cf 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smin.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-smin.ll
@@ -314,8 +314,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 
 define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v16i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x 
i16> [[A0]], <16 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> 
poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x 
i16> [[TMP2]], <16 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> 
poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <16 x i16> @llvm.smin.v16i16(<16 x 
i16> [[TMP4]], <16 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -333,8 +338,13 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 
 define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.smin.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.smin.v32i16(<32 x 
i16> [[A0]], <32 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i16> @llvm.smin.v32i16(<32 x 
i16> [[TMP2]], <32 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i16> @llvm.smin.v32i16(<32 x 
i16> [[TMP4]], <32 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <32 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef>
@@ -352,8 +362,15 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 
 define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.smin.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.smin.v32i8(<32 x i8> 
[[A0]], <32 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> 
poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i8> @llvm.smin.v32i8(<32 x i8> 
[[TMP2]], <32 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP4]], <32 x i8> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i8> @llvm.smin.v32i8(<32 x i8> 
[[TMP4]], <32 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP6]], <32 x i8> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <32 x i8> @llvm.smin.v32i8(<32 x i8> 
[[TMP6]], <32 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <32 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef>
@@ -374,8 +391,15 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 
 define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v64i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.smin.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.smin.v64i8(<64 x i8> 
[[A0]], <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> 
poison, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <64 x i8> @llvm.smin.v64i8(<64 x i8> 
[[TMP2]], <64 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <64 x i8> [[TMP4]], <64 x i8> 
poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <64 x i8> @llvm.smin.v64i8(<64 x i8> 
[[TMP4]], <64 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <64 x i8> [[TMP6]], <64 x i8> 
poison, <64 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <64 x i8> @llvm.smin.v64i8(<64 x i8> 
[[TMP6]], <64 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <64 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umax.ll 
b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umax.ll
index f7d5a99bc0da0..dbb448c4b96e5 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umax.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umax.ll
@@ -314,8 +314,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 
 define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v16i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.umax.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.umax.v16i16(<16 x 
i16> [[A0]], <16 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> 
poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <16 x i16> @llvm.umax.v16i16(<16 x 
i16> [[TMP2]], <16 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> 
poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <16 x i16> @llvm.umax.v16i16(<16 x 
i16> [[TMP4]], <16 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -333,8 +338,13 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 
 define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.umax.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.umax.v32i16(<32 x 
i16> [[A0]], <32 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i16> @llvm.umax.v32i16(<32 x 
i16> [[TMP2]], <32 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i16> @llvm.umax.v32i16(<32 x 
i16> [[TMP4]], <32 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <32 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef>
@@ -352,8 +362,15 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 
 define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.umax.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.umax.v32i8(<32 x i8> 
[[A0]], <32 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> 
poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i8> @llvm.umax.v32i8(<32 x i8> 
[[TMP2]], <32 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP4]], <32 x i8> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i8> @llvm.umax.v32i8(<32 x i8> 
[[TMP4]], <32 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP6]], <32 x i8> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <32 x i8> @llvm.umax.v32i8(<32 x i8> 
[[TMP6]], <32 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <32 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef>
@@ -374,8 +391,15 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 
 define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v64i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.umax.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.umax.v64i8(<64 x i8> 
[[A0]], <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> 
poison, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <64 x i8> @llvm.umax.v64i8(<64 x i8> 
[[TMP2]], <64 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <64 x i8> [[TMP4]], <64 x i8> 
poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <64 x i8> @llvm.umax.v64i8(<64 x i8> 
[[TMP4]], <64 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <64 x i8> [[TMP6]], <64 x i8> 
poison, <64 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <64 x i8> @llvm.umax.v64i8(<64 x i8> 
[[TMP6]], <64 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <64 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umin.ll 
b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umin.ll
index e2fc523dd271c..bd2366d49a951 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umin.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/horizontal-reduce-umin.ll
@@ -314,8 +314,13 @@ define i8 @test_reduce_v64i8(<64 x i8> %a0) {
 
 define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v16i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.umin.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x 
i16> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.umin.v16i16(<16 x 
i16> [[A0]], <16 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> 
poison, <16 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <16 x i16> @llvm.umin.v16i16(<16 x 
i16> [[TMP2]], <16 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[TMP4]], <16 x i16> 
poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <16 x i16> @llvm.umin.v16i16(<16 x 
i16> [[TMP4]], <16 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <16 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -333,8 +338,13 @@ define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
 
 define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i16_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = tail call i16 
@llvm.vector.reduce.umin.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x 
i16> poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.umin.v32i16(<32 x 
i16> [[A0]], <32 x i16> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i16> @llvm.umin.v32i16(<32 x 
i16> [[TMP2]], <32 x i16> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i16> [[TMP4]], <32 x i16> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i16> @llvm.umin.v32i16(<32 x 
i16> [[TMP4]], <32 x i16> [[TMP5]])
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <32 x i16> [[TMP6]], i64 0
 ; CHECK-NEXT:    ret i16 [[TMP10]]
 ;
   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef>
@@ -352,8 +362,15 @@ define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
 
 define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v32i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> 
poison, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> 
[[A0]], <32 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> 
poison, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> 
[[TMP2]], <32 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <32 x i8> [[TMP4]], <32 x i8> 
poison, <32 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> 
[[TMP4]], <32 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <32 x i8> [[TMP6]], <32 x i8> 
poison, <32 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> 
[[TMP6]], <32 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <32 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef>
@@ -374,8 +391,15 @@ define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
 
 define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
 ; CHECK-LABEL: @test_reduce_v64i8_v16i8(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 
8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    [[TMP13:%.*]] = tail call i8 
@llvm.vector.reduce.umin.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> 
poison, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 
15, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> 
[[A0]], <64 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> 
poison, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> 
[[TMP2]], <64 x i8> [[TMP3]])
+; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <64 x i8> [[TMP4]], <64 x i8> 
poison, <64 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> 
[[TMP4]], <64 x i8> [[TMP5]])
+; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <64 x i8> [[TMP6]], <64 x i8> 
poison, <64 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP8:%.*]] = tail call <64 x i8> @llvm.umin.v64i8(<64 x i8> 
[[TMP6]], <64 x i8> [[TMP7]])
+; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <64 x i8> [[TMP8]], i64 0
 ; CHECK-NEXT:    ret i8 [[TMP13]]
 ;
   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 
9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 
undef, i32 undef, i32 undef, i32 undef, i32 undef>

diff  --git 
a/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll 
b/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll
index 71809534016d1..403ce33b5344e 100644
--- a/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll
+++ b/llvm/test/Transforms/VectorCombine/fold-shuffle-chains-to-reduce.ll
@@ -193,53 +193,3 @@ define i16 @test_reduce_v6i16_xor_neg(<6 x i16> %a0) {
   %7 = extractelement <6 x i16> %6, i64 0
   ret i16 %7
 }
-
-; Partial reduction: reduce lower 8 elements of a 16-element vector using smax.
-define i16 @test_partial_reduce_v16i16_v8i16_smax(<16 x i16> %a0) {
-; CHECK-LABEL: define i16 @test_partial_reduce_v16i16_v8i16_smax(
-; CHECK-SAME: <16 x i16> [[A0:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0]], <16 x i16> 
poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.smax.v8i16(<8 x 
i16> [[TMP1]])
-; CHECK-NEXT:    ret i16 [[TMP2]]
-;
-  %1 = shufflevector <16 x i16> %a0, <16 x i16> poison, <16 x i32> <i32 4, i32 
5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison>
-  %2 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %a0, <16 x i16> %1)
-  %3 = shufflevector <16 x i16> %2, <16 x i16> poison, <16 x i32> <i32 2, i32 
3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison>
-  %4 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %2, <16 x i16> %3)
-  %5 = shufflevector <16 x i16> %4, <16 x i16> poison, <16 x i32> <i32 1, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison>
-  %6 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %4, <16 x i16> %5)
-  %7 = extractelement <16 x i16> %6, i64 0
-  ret i16 %7
-}
-
-; Partial reduction: reduce lower 4 elements of an 8-element vector using add.
-define i32 @test_partial_reduce_v8i32_v4i32_add(<8 x i32> %a0) {
-; CHECK-LABEL: define i32 @test_partial_reduce_v8i32_v4i32_add(
-; CHECK-SAME: <8 x i32> [[A0:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0]], <8 x i32> 
poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x 
i32> [[TMP1]])
-; CHECK-NEXT:    ret i32 [[TMP2]]
-;
-  %1 = shufflevector <8 x i32> %a0, <8 x i32> poison, <8 x i32> <i32 2, i32 3, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-  %2 = add <8 x i32> %a0, %1
-  %3 = shufflevector <8 x i32> %2, <8 x i32> poison, <8 x i32> <i32 1, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-  %4 = add <8 x i32> %2, %3
-  %5 = extractelement <8 x i32> %4, i64 0
-  ret i32 %5
-}
-
-; Partial reduction: reduce lower 4 elements of a 16-element vector using umin.
-define i16 @test_partial_reduce_v16i16_v4i16_umin(<16 x i16> %a0) {
-; CHECK-LABEL: define i16 @test_partial_reduce_v16i16_v4i16_umin(
-; CHECK-SAME: <16 x i16> [[A0:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0]], <16 x i16> 
poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x 
i16> [[TMP1]])
-; CHECK-NEXT:    ret i16 [[TMP2]]
-;
-  %1 = shufflevector <16 x i16> %a0, <16 x i16> poison, <16 x i32> <i32 2, i32 
3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison>
-  %2 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %a0, <16 x i16> %1)
-  %3 = shufflevector <16 x i16> %2, <16 x i16> poison, <16 x i32> <i32 1, i32 
poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, 
i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 
poison, i32 poison>
-  %4 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %2, <16 x i16> %3)
-  %5 = extractelement <16 x i16> %4, i64 0
-  ret i16 %5
-}


        
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to