https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/176432
Backport 6f69d68a9ef4a4e3fae634a80f24ea5d77e1fd45 Requested by: @mshockwave >From 2a4ce5961b03345bdf8d62f36a26a9c9764fec0b Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu <[email protected]> Date: Fri, 16 Jan 2026 09:07:21 -0800 Subject: [PATCH] [RISCV] Add missing COPY elimination when folding vmerge into mask (#176077) Found in #176001 (but unfortunately unrelated to its miscompilation), the following snippet ``` early-clobber %102:vrm8 = PseudoVZEXT_VF8_M8 undef $noreg, killed undef %326, 16, 6 /* e64 */, 3 /* ta, ma */ %123:vrm8nov0 = COPY %102 %124:vmv0 = IMPLICIT_DEF %121:vrm8nov0 = PseudoVMERGE_VVM_M8 undef $noreg, killed undef %327, killed undef %123, undef %124, 16, 6 /* e64 */ %125:vrm8 = COPY killed %121 BEQ killed undef %325, $x0, %bb.8 PseudoBR %bb.7 ``` is turned into ``` %123:vrm8nov0 = COPY %121:vrm8nov0 %124:vmv0 = IMPLICIT_DEF early-clobber %121:vrm8nov0 = PseudoVZEXT_VF8_M8_MASK undef %327:vrm8nov0(tied-def 0), killed undef %326:vr, %124:vmv0, 16, 6, 1 %125:vrm8 = COPY killed %121:vrm8nov0 BEQ killed undef %325:gpr, $x0, %bb.8 PseudoBR %bb.7 ``` by RISC-V Vector Peephole's vmerge folding. This is problematic because `%121` is used before its definition. This was caused by the fact that vector peephole try to sink the new instruction -- in this case `PseudoVZEXT_VF8_M8_MASK` -- until it's dominated by the mask. But we forgot to sink all the COPYs of its result like `%123` as well. This patch fixes this by removing those COPYs after the folding, as all of their users should be dead at that moment. (cherry picked from commit 6f69d68a9ef4a4e3fae634a80f24ea5d77e1fd45) --- llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp | 27 ++++++++++++---- .../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 3 +- .../CodeGen/RISCV/rvv/vmerge-peephole.mir | 32 ++++++++++++++++++- 3 files changed, 52 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index a5385be0c011c..b00244af1a875 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -73,7 +73,9 @@ class RISCVVectorPeephole : public MachineFunctionPass { bool isAllOnesMask(const MachineInstr *MaskDef) const; std::optional<unsigned> getConstant(const MachineOperand &VL) const; bool ensureDominates(const MachineOperand &Use, MachineInstr &Src) const; - Register lookThruCopies(Register Reg, bool OneUseOnly = false) const; + Register + lookThruCopies(Register Reg, bool OneUseOnly = false, + SmallVectorImpl<MachineInstr *> *Copies = nullptr) const; }; } // namespace @@ -389,8 +391,9 @@ bool RISCVVectorPeephole::convertAllOnesVMergeToVMv(MachineInstr &MI) const { // If \p Reg is defined by one or more COPYs of virtual registers, traverses // the chain and returns the root non-COPY source. -Register RISCVVectorPeephole::lookThruCopies(Register Reg, - bool OneUseOnly) const { +Register RISCVVectorPeephole::lookThruCopies( + Register Reg, bool OneUseOnly, + SmallVectorImpl<MachineInstr *> *Copies) const { while (MachineInstr *Def = MRI->getUniqueVRegDef(Reg)) { if (!Def->isFullCopy()) break; @@ -399,6 +402,8 @@ Register RISCVVectorPeephole::lookThruCopies(Register Reg, break; if (OneUseOnly && !MRI->hasOneNonDBGUse(Reg)) break; + if (Copies) + Copies->push_back(Def); Reg = Src; } return Reg; @@ -735,10 +740,12 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (RISCV::getRVVMCOpcode(MI.getOpcode()) != RISCV::VMERGE_VVM) return false; + // Collect chain of COPYs on True's result for later cleanup. + SmallVector<MachineInstr *, 4> TrueCopies; Register PassthruReg = lookThruCopies(MI.getOperand(1).getReg()); Register FalseReg = lookThruCopies(MI.getOperand(2).getReg()); - Register TrueReg = - lookThruCopies(MI.getOperand(3).getReg(), /*OneUseOnly=*/true); + Register TrueReg = lookThruCopies(MI.getOperand(3).getReg(), + /*OneUseOnly=*/true, &TrueCopies); if (!TrueReg.isVirtual() || !MRI->hasOneUse(TrueReg)) return false; MachineInstr &True = *MRI->getUniqueVRegDef(TrueReg); @@ -821,8 +828,9 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { assert(RISCVII::hasVecPolicyOp(True.getDesc().TSFlags) && "Foldable unmasked pseudo should have a policy op already"); - // Make sure the mask dominates True, otherwise move down True so it does. - // VL will always dominate since if it's a register they need to be the same. + // Make sure the mask dominates True and its copies, otherwise move down True + // so it does. VL will always dominate since if it's a register they need to + // be the same. if (!ensureDominates(MaskOp, True)) return false; @@ -861,6 +869,11 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { MRI->clearKillFlags(FalseReg); MI.eraseFromParent(); + // Cleanup all the COPYs on True's value. We have to manually do this because + // sometimes sinking True causes these COPY to be invalid (use before define). + for (MachineInstr *TrueCopy : TrueCopies) + TrueCopy->eraseFromParent(); + return true; } diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 5be32cc35fe37..acd9519bb5a8e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -867,9 +867,8 @@ define void @test_dag_loop() { ; CHECK-NEXT: vmseq.vv v0, v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vsetivli zero, 1, e16, m8, tu, mu +; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu ; CHECK-NEXT: vle16.v v8, (zero), v0.t -; CHECK-NEXT: vsetivli zero, 0, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (zero) ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir index bc78a7732c15a..98b193f24d7c8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-peephole.mir @@ -128,7 +128,6 @@ body: | ; CHECK-NEXT: %passthru:vrnov0 = COPY $v8 ; CHECK-NEXT: %mask:vmv0 = COPY $v0 ; CHECK-NEXT: %z:vrnov0 = PseudoVLE32_V_M1_MASK %passthru, $noreg, %mask, %avl, 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size, align 1) - ; CHECK-NEXT: %y:vrnov0 = COPY %z %avl:gprnox0 = COPY $x8 %passthru:vrnov0 = COPY $v8 %x:vr = PseudoVLE32_V_M1 $noreg, $noreg, %avl, 5 /* e32 */, 2 /* tu, ma */ :: (load unknown-size) @@ -181,3 +180,34 @@ body: | %mask:vmv0 = COPY $v0 PseudoVSE8_V_M1 %copy, $noreg, %avl, 5 /* e8 */ %y:vrnov0 = PseudoVMERGE_VVM_M1 %passthru, %passthru, %copy, %mask, %avl, 5 /* e32 */ +... +--- +name: true_copy_elimination +body: | + ; CHECK-LABEL: name: true_copy_elimination + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vmv0 = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber %5:vrm8nov0 = PseudoVZEXT_VF8_M8_MASK $noreg, $noreg, [[DEF]], 16, 6 /* e64 */, 1 /* ta, mu */ + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vrm8 = COPY %5 + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.1 + + PseudoBR %bb.1 + + bb.1: + + %102:vrm8 = PseudoVZEXT_VF8_M8 $noreg, $noreg, 16, 6 /* e64 */, 3 /* ta, ma */ + %123:vrm8nov0 = COPY %102 + %a123:vrm8 = COPY %123 + %b123:vrm8nov0 = COPY %a123 + %124:vmv0 = IMPLICIT_DEF + %121:vrm8nov0 = PseudoVMERGE_VVM_M8 $noreg, $noreg, %b123, undef %124, 16, 6 /* e64 */ + %125:vrm8 = COPY %121 + PseudoRET +... _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
