https://github.com/krzysz00 updated 
https://github.com/llvm/llvm-project/pull/200937

>From bfdfeffe3aa45f34c4257b739e6297146789edb6 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <[email protected]>
Date: Sat, 30 May 2026 02:18:03 +0000
Subject: [PATCH] [SelectionDAG] Fold subvector inserts into concat operands

Push insert_subvector into the containing CONCAT_VECTORS operand when the 
insertion is wholly contained there.

AI note: an LLM generated the code and the test, I've read them

Co-Authored-By: OpenAI Codex <[email protected]>
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 44 ++++++++++++++-----
 .../CodeGen/X86/dagcombine-insert-concat.ll   | 22 ++--------
 2 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp 
b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 24bd00892a49b..b451408d2025b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -29525,16 +29525,40 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) 
{
     }
   }
 
-  // If the input vector is a concatenation, and the insert replaces
-  // one of the pieces, we can optimize into a single concat_vectors.
-  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
-      N0.getOperand(0).getValueType() == N1.getValueType() &&
-      N0.getOperand(0).getValueType().isScalableVector() ==
-          N1.getValueType().isScalableVector()) {
-    unsigned Factor = N1.getValueType().getVectorMinNumElements();
-    SmallVector<SDValue, 8> Ops(N0->ops());
-    Ops[InsIdx / Factor] = N1;
-    return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+  // If the input vector is a concatenation and the insert is wholly contained
+  // in one of its operands, push the insertion into that operand.
+  if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse()) {
+    EVT ConcatOpVT = N0.getOperand(0).getValueType();
+    EVT InsVT = N1.getValueType();
+    unsigned Factor = ConcatOpVT.getVectorMinNumElements();
+    unsigned ConcatOpIdx = InsIdx / Factor;
+    unsigned RelativeIdx = InsIdx - ConcatOpIdx * Factor;
+    if (ConcatOpIdx < N0.getNumOperands()) {
+      // If the insert replaces a whole concat operand, optimize into a single
+      // concat_vectors.
+      if (ConcatOpVT == InsVT &&
+          ConcatOpVT.isScalableVector() == InsVT.isScalableVector() &&
+          RelativeIdx == 0) {
+        SmallVector<SDValue, 8> Ops(N0->ops());
+        Ops[ConcatOpIdx] = N1;
+        return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+      }
+
+      if (VT.isFixedLengthVector() && ConcatOpVT.isFixedLengthVector() &&
+          InsVT.isFixedLengthVector() &&
+          ConcatOpVT.getVectorElementType() == InsVT.getVectorElementType() &&
+          hasOperation(ISD::INSERT_SUBVECTOR, ConcatOpVT)) {
+        unsigned NumConcatOpElts = ConcatOpVT.getVectorNumElements();
+        unsigned NumInsElts = InsVT.getVectorNumElements();
+        if (RelativeIdx % NumInsElts == 0 &&
+            RelativeIdx + NumInsElts <= NumConcatOpElts) {
+          SmallVector<SDValue, 8> Ops(N0->ops());
+          Ops[ConcatOpIdx] = DAG.getInsertSubvector(SDLoc(N), Ops[ConcatOpIdx],
+                                                    N1, RelativeIdx);
+          return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+        }
+      }
+    }
   }
 
   // Simplify source operands based on insertion.
diff --git a/llvm/test/CodeGen/X86/dagcombine-insert-concat.ll 
b/llvm/test/CodeGen/X86/dagcombine-insert-concat.ll
index a143b85ec74d6..c07fea48e2f8c 100644
--- a/llvm/test/CodeGen/X86/dagcombine-insert-concat.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-insert-concat.ll
@@ -10,25 +10,11 @@ define <32 x i16> @insert_concat_select(i1 %cond, <16 x 
i16> %a, <16 x i16> %b,
 ; CHECK-NEXT:    testb $1, %dil
 ; CHECK-NEXT:    je .LBB0_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm3
-; CHECK-NEXT:    vmovd %xmm2, %eax
-; CHECK-NEXT:    movl $65536, %ecx # imm = 0x10000
-; CHECK-NEXT:    kmovd %ecx, %k1
-; CHECK-NEXT:    vpbroadcastw %eax, %zmm3 {%k1}
-; CHECK-NEXT:    vpextrw $1, %xmm2, %eax
-; CHECK-NEXT:    movl $131072, %ecx # imm = 0x20000
-; CHECK-NEXT:    kmovd %ecx, %k1
-; CHECK-NEXT:    vpbroadcastw %eax, %zmm3 {%k1}
-; CHECK-NEXT:    vpextrw $2, %xmm2, %eax
-; CHECK-NEXT:    movl $262144, %ecx # imm = 0x40000
-; CHECK-NEXT:    kmovd %ecx, %k1
-; CHECK-NEXT:    vpbroadcastw %eax, %zmm3 {%k1}
-; CHECK-NEXT:    vpextrw $3, %xmm2, %eax
-; CHECK-NEXT:    movl $524288, %ecx # imm = 0x80000
-; CHECK-NEXT:    kmovd %ecx, %k1
-; CHECK-NEXT:    vpbroadcastw %eax, %zmm3 {%k1}
+; CHECK-NEXT:    vblendps {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3]
+; CHECK-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
+; CHECK-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm3
 ; CHECK-NEXT:  .LBB0_2: # %entry
-; CHECK-NEXT:    vmovdqa64 %zmm3, %zmm0
+; CHECK-NEXT:    vmovaps %zmm3, %zmm0
 ; CHECK-NEXT:    retq
 entry:
   %wide = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> <i32 0, i32 
1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 
12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 
22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to