https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/95924
>From 5ac4ff3040f8a5a6cc68efffe3349ef9d181ddec Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Tue, 18 Jun 2024 21:33:25 +0800 Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?= =?UTF-8?q?itial=20version?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-beta.1 --- llvm/include/llvm/CodeGen/MachineScheduler.h | 43 ++++++++-- llvm/lib/CodeGen/MachineScheduler.cpp | 34 +------- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + .../Target/RISCV/RISCVMachineScheduler.cpp | 83 +++++++++++++++++++ llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 42 ++++++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 8 +- llvm/test/CodeGen/RISCV/rvv/schedule.ll | 49 +++++++++++ 7 files changed, 215 insertions(+), 45 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.h create mode 100644 llvm/test/CodeGen/RISCV/rvv/schedule.ll diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index b15abf040058e..d1b5b83e5300b 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -1349,14 +1349,6 @@ class PostGenericScheduler : public GenericSchedulerBase { void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand); }; -/// Create the standard converging machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -/// Adds default DAG mutations. -ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C); - -/// Create a generic scheduler with no vreg liveness or DAG mutation passes. -ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C); - /// If ReorderWhileClustering is set to true, no attempt will be made to /// reduce reordering due to store clustering. std::unique_ptr<ScheduleDAGMutation> @@ -1375,6 +1367,41 @@ std::unique_ptr<ScheduleDAGMutation> createCopyConstrainDAGMutation(const TargetInstrInfo *TII, const TargetRegisterInfo *TRI); +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +/// Adds default DAG mutations. +template <typename Strategy = GenericScheduler> +ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = + new ScheduleDAGMILive(C, std::make_unique<Strategy>(C)); + // Register DAG post-processors. + // + // FIXME: extend the mutation API to allow earlier mutations to instantiate + // data and pass it to later mutations. Have a single mutation that gathers + // the interesting nodes in one pass. + DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); + + const TargetSubtargetInfo &STI = C->MF->getSubtarget(); + // Add MacroFusion mutation if fusions are not empty. + const auto &MacroFusions = STI.getMacroFusions(); + if (!MacroFusions.empty()) + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); + return DAG; +} + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +template <typename Strategy = PostGenericScheduler> +ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C) { + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C), + /*RemoveKillFlags=*/true); + const TargetSubtargetInfo &STI = C->MF->getSubtarget(); + // Add MacroFusion mutation if fusions are not empty. + const auto &MacroFusions = STI.getMacroFusions(); + if (!MacroFusions.empty()) + DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); + return DAG; +} + } // end namespace llvm #endif // LLVM_CODEGEN_MACHINESCHEDULER_H diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index cf72f74380835..ac792ad4d5484 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2701,7 +2701,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: - assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); break; case 1: if (ReadyCycle > NextCycle) { @@ -3847,26 +3847,6 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } } -/// Create the standard converging machine scheduler. This will be used as the -/// default scheduler if the target does not set a default. -ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) { - ScheduleDAGMILive *DAG = - new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)); - // Register DAG post-processors. - // - // FIXME: extend the mutation API to allow earlier mutations to instantiate - // data and pass it to later mutations. Have a single mutation that gathers - // the interesting nodes in one pass. - DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); - - const TargetSubtargetInfo &STI = C->MF->getSubtarget(); - // Add MacroFusion mutation if fusions are not empty. - const auto &MacroFusions = STI.getMacroFusions(); - if (!MacroFusions.empty()) - DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); - return DAG; -} - static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { return createGenericSchedLive(C); } @@ -4139,18 +4119,6 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { } } -ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) { - ScheduleDAGMI *DAG = - new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C), - /*RemoveKillFlags=*/true); - const TargetSubtargetInfo &STI = C->MF->getSubtarget(); - // Add MacroFusion mutation if fusions are not empty. - const auto &MacroFusions = STI.getMacroFusions(); - if (!MacroFusions.empty()) - DAG->addMutation(createMacroFusionDAGMutation(MacroFusions)); - return DAG; -} - //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 8715403f3839a..fe3f213b253f7 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen RISCVISelDAGToDAG.cpp RISCVISelLowering.cpp RISCVMachineFunctionInfo.cpp + RISCVMachineScheduler.cpp RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp new file mode 100644 index 0000000000000..d993d840c3d3a --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -0,0 +1,83 @@ +//===- RISCVMachineScheduler.cpp - MI Scheduler for RISC-V ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RISCVMachineScheduler.h" +#include "MCTargetDesc/RISCVBaseInfo.h" +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/Support/Debug.h" +#include "llvm/TargetParser/RISCVTargetParser.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-prera-sched-strategy" + +static cl::opt<bool> EnableScheduleSameVType( + "riscv-enable-schedule-same-vtype", cl::init(false), cl::Hidden, + cl::desc("Enable scheduling RVV instructions with same vtype first")); + +SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) { + if (EnableScheduleSameVType) { + for (SUnit *SU : Bot.Available) { + MachineInstr *MI = SU->getInstr(); + const MCInstrDesc &Desc = MI->getDesc(); + if (RISCVII::hasSEWOp(Desc.TSFlags)) { + unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); + RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); + if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { + Bot.removeReady(SU); + IsTopNode = true; + return SU; + } + } + } + for (SUnit *SU : Bot.Pending) { + MachineInstr *MI = SU->getInstr(); + const MCInstrDesc &Desc = MI->getDesc(); + if (RISCVII::hasSEWOp(Desc.TSFlags)) { + unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); + RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); + if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { + Bot.removeReady(SU); + IsTopNode = false; + return SU; + } + } + } + } + return GenericScheduler::pickNode(IsTopNode); +} + +bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary *Zone) const { + bool OriginalResult = GenericScheduler::tryCandidate(Cand, TryCand, Zone); + + return OriginalResult; +} + +void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { + GenericScheduler::schedNode(SU, IsTopNode); + MachineInstr *MI = SU->getInstr(); + const MCInstrDesc &Desc = MI->getDesc(); + if (RISCVII::hasSEWOp(Desc.TSFlags)) { + PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); + PrevVLMUL = RISCVII::getLMul(Desc.TSFlags); + } + LLVM_DEBUG(dbgs() << "Previous scheduled Unit: "; + dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump();); + LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n"; + auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL); + dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "") + << LMUL.first << "\n";); +} diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h new file mode 100644 index 0000000000000..bd806cef57dcb --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h @@ -0,0 +1,42 @@ +//===--- RISCVMachineScheduler.h - Custom RISC-V MI scheduler ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom RISC-V MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H +#define LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/TargetParser/RISCVTargetParser.h" + +namespace llvm { + +/// A GenericScheduler implementation for RISCV pre RA scheduling. +class RISCVPreRAMachineSchedStrategy : public GenericScheduler { +private: + RISCVII::VLMUL PrevVLMUL; + unsigned PrevVSEW; + +public: + RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C) + : GenericScheduler(C) {} + +protected: + SUnit *pickNode(bool &IsTopNode) override; + + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, + SchedBoundary *Zone) const override; + + void schedNode(SUnit *SU, bool IsTopNode) override; +}; + +} // end namespace llvm + +#endif diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 35d0b3408d09f..e0dcbbddc3f53 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" #include "RISCVMachineFunctionInfo.h" +#include "RISCVMachineScheduler.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" @@ -340,12 +341,11 @@ class RISCVPassConfig : public TargetPassConfig { ScheduleDAGInstrs * createMachineScheduler(MachineSchedContext *C) const override { - ScheduleDAGMILive *DAG = nullptr; - if (EnableMISchedLoadClustering) { - DAG = createGenericSchedLive(C); + ScheduleDAGMILive *DAG = + createGenericSchedLive<RISCVPreRAMachineSchedStrategy>(C); + if (EnableMISchedLoadClustering) DAG->addMutation(createLoadClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); - } return DAG; } diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll new file mode 100644 index 0000000000000..baf15ef400df5 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=DEFAULT +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST + +define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) { +; DEFAULT-LABEL: test: +; DEFAULT: # %bb.0: # %entry +; DEFAULT-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; DEFAULT-NEXT: vdiv.vv v12, v8, v9 +; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: vdiv.vv v13, v10, v11 +; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; DEFAULT-NEXT: vadd.vv v8, v8, v9 +; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: vadd.vv v9, v10, v11 +; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; DEFAULT-NEXT: vadd.vv v8, v8, v12 +; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: vadd.vv v9, v9, v13 +; DEFAULT-NEXT: vwadd.wv v8, v8, v9 +; DEFAULT-NEXT: ret +; +; SAME-VTYPE-FIRST-LABEL: test: +; SAME-VTYPE-FIRST: # %bb.0: # %entry +; SAME-VTYPE-FIRST-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-NEXT: vadd.vv v12, v8, v9 +; SAME-VTYPE-FIRST-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-NEXT: vadd.vv v8, v12, v8 +; SAME-VTYPE-FIRST-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v10, v11 +; SAME-VTYPE-FIRST-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v9, v10 +; SAME-VTYPE-FIRST-NEXT: vwadd.wv v8, v8, v9 +; SAME-VTYPE-FIRST-NEXT: ret +entry: + %0 = add <vscale x 1 x i64> %v64_0, %v64_1 + %1 = add <vscale x 1 x i32> %v32_0, %v32_1 + %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1 + %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1 + %4 = add <vscale x 1 x i64> %0, %2 + %5 = add <vscale x 1 x i32> %1, %3 + + %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64> + %7 = add <vscale x 1 x i64> %4, %6 + ret <vscale x 1 x i64> %7 +} + >From 185e0f8266c2dec9a161328c6c14490fe3cffa69 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Wed, 19 Jun 2024 18:45:50 +0800 Subject: [PATCH 2/8] Support buttomup/topdown/bidirectional and fix some failures Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVMachineScheduler.cpp | 79 ++++++++--- llvm/test/CodeGen/RISCV/rvv/schedule.ll | 125 +++++++++++++++--- 2 files changed, 165 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index d993d840c3d3a..530d4f6b2d845 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -27,34 +27,68 @@ static cl::opt<bool> EnableScheduleSameVType( cl::desc("Enable scheduling RVV instructions with same vtype first")); SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) { - if (EnableScheduleSameVType) { - for (SUnit *SU : Bot.Available) { + auto FindPotentialRVVInstructionInQueue = + [&](SchedBoundary &Boundary, ReadyQueue Q, bool ShouldBeTop) -> SUnit * { + for (SUnit *SU : Q) { + if (SU->isScheduled) + continue; + MachineInstr *MI = SU->getInstr(); const MCInstrDesc &Desc = MI->getDesc(); if (RISCVII::hasSEWOp(Desc.TSFlags)) { unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); + // FIXME: We should consider vl and policy here. if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { - Bot.removeReady(SU); - IsTopNode = true; + IsTopNode = ShouldBeTop; + // Boundary.removeReady(SU); + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " + << *SU->getInstr()); return SU; } } } - for (SUnit *SU : Bot.Pending) { - MachineInstr *MI = SU->getInstr(); - const MCInstrDesc &Desc = MI->getDesc(); - if (RISCVII::hasSEWOp(Desc.TSFlags)) { - unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); - RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags); - if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) { - Bot.removeReady(SU); - IsTopNode = false; - return SU; - } - } + return nullptr; + }; + + auto FindPotentialRVVInstruction = [&](SchedBoundary &Boundary, + bool ShouldBeTop) -> SUnit * { + if (SUnit *Available = FindPotentialRVVInstructionInQueue( + Boundary, Boundary.Available, ShouldBeTop)) + return Available; + if (SUnit *Pending = FindPotentialRVVInstructionInQueue( + Boundary, Boundary.Pending, ShouldBeTop)) + return Pending; + return nullptr; + }; + + if (EnableScheduleSameVType) { + if (RegionPolicy.OnlyBottomUp) { + if (SUnit *SU = FindPotentialRVVInstruction(Bot, false)) + return SU; + } else if (RegionPolicy.OnlyTopDown) { + if (SUnit *SU = FindPotentialRVVInstruction(Top, true)) + return SU; + } else { + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Bot, Bot.Available, false)) + return SU; + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Top, Top.Available, true)) + return SU; + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Bot, Bot.Pending, false)) + return SU; + if (SUnit *SU = + FindPotentialRVVInstructionInQueue(Top, Top.Pending, true)) + return SU; } } + return GenericScheduler::pickNode(IsTopNode); } @@ -73,11 +107,12 @@ void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { if (RISCVII::hasSEWOp(Desc.TSFlags)) { PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm(); PrevVLMUL = RISCVII::getLMul(Desc.TSFlags); + LLVM_DEBUG(dbgs() << "Previous scheduled Unit: "; + dbgs() << "SU(" << SU->NodeNum << ") - "; + SU->getInstr()->dump();); + LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n"; + auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL); + dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "") + << LMUL.first << "\n";); } - LLVM_DEBUG(dbgs() << "Previous scheduled Unit: "; - dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump();); - LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n"; - auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL); - dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "") - << LMUL.first << "\n";); } diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll index baf15ef400df5..6b466d802ac4a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/schedule.ll +++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll @@ -1,15 +1,33 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=DEFAULT -; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ -; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=true -misched-topdown=false \ +; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BOTTOMUP +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=true \ +; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-TOPDOWN +; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=false \ +; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BIDIRECTIONAL -define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) { +declare void @consume(i64 %scalar, <vscale x 1 x i64> %vector) + +define void @test(i64 %a, i64 %b, <vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) { ; DEFAULT-LABEL: test: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; DEFAULT-NEXT: addi sp, sp, -16 +; DEFAULT-NEXT: .cfi_def_cfa_offset 16 +; DEFAULT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; DEFAULT-NEXT: .cfi_offset ra, -8 +; DEFAULT-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; DEFAULT-NEXT: vdiv.vv v12, v8, v9 ; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; DEFAULT-NEXT: div a2, a0, a1 +; DEFAULT-NEXT: add a3, a0, a1 +; DEFAULT-NEXT: mul a0, a0, a1 +; DEFAULT-NEXT: add a0, a0, a3 +; DEFAULT-NEXT: add a0, a0, a2 ; DEFAULT-NEXT: vdiv.vv v13, v10, v11 ; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; DEFAULT-NEXT: vadd.vv v8, v8, v9 @@ -20,30 +38,103 @@ define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v ; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; DEFAULT-NEXT: vadd.vv v9, v9, v13 ; DEFAULT-NEXT: vwadd.wv v8, v8, v9 +; DEFAULT-NEXT: call consume +; DEFAULT-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; DEFAULT-NEXT: addi sp, sp, 16 ; DEFAULT-NEXT: ret ; -; SAME-VTYPE-FIRST-LABEL: test: -; SAME-VTYPE-FIRST: # %bb.0: # %entry -; SAME-VTYPE-FIRST-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; SAME-VTYPE-FIRST-NEXT: vadd.vv v12, v8, v9 -; SAME-VTYPE-FIRST-NEXT: vdiv.vv v8, v8, v9 -; SAME-VTYPE-FIRST-NEXT: vadd.vv v8, v12, v8 -; SAME-VTYPE-FIRST-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v10, v11 -; SAME-VTYPE-FIRST-NEXT: vdiv.vv v10, v10, v11 -; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v9, v10 -; SAME-VTYPE-FIRST-NEXT: vwadd.wv v8, v8, v9 -; SAME-VTYPE-FIRST-NEXT: ret +; SAME-VTYPE-FIRST-BOTTOMUP-LABEL: test: +; SAME-VTYPE-FIRST-BOTTOMUP: # %bb.0: # %entry +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: addi sp, sp, -16 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: .cfi_def_cfa_offset 16 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: .cfi_offset ra, -8 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vsetvli a2, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v12, v8, v9 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: div a2, a0, a1 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a3, a0, a1 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: mul a0, a0, a1 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a0, a0, a3 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a0, a0, a2 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v8, v12, v8 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v9, v10, v11 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v9, v9, v10 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vwadd.wv v8, v8, v9 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: call consume +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: addi sp, sp, 16 +; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: ret +; +; SAME-VTYPE-FIRST-TOPDOWN-LABEL: test: +; SAME-VTYPE-FIRST-TOPDOWN: # %bb.0: # %entry +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: addi sp, sp, -16 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: .cfi_def_cfa_offset 16 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: .cfi_offset ra, -8 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli a3, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v12, v10, v11 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a2, a0, a1 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: mul a3, a0, a1 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: div a0, a0, a1 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a2, a2, a3 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v10, v12, v10 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v11, v8, v9 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a0, a0, a2 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v8, v11, v8 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vwadd.wv v8, v8, v10 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: call consume +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: addi sp, sp, 16 +; SAME-VTYPE-FIRST-TOPDOWN-NEXT: ret +; +; SAME-VTYPE-FIRST-BIDIRECTIONAL-LABEL: test: +; SAME-VTYPE-FIRST-BIDIRECTIONAL: # %bb.0: # %entry +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: addi sp, sp, -16 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: .cfi_def_cfa_offset 16 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: .cfi_offset ra, -8 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli a2, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v12, v10, v11 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vdiv.vv v10, v10, v11 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e64, m1, ta, ma +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: div a2, a0, a1 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a3, a0, a1 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: mul a0, a0, a1 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a0, a0, a3 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v11, v8, v9 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vdiv.vv v8, v8, v9 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a0, a0, a2 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v8, v11, v8 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v9, v12, v10 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vwadd.wv v8, v8, v9 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: call consume +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: addi sp, sp, 16 +; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: ret entry: %0 = add <vscale x 1 x i64> %v64_0, %v64_1 + %scalar0 = add i64 %a, %b %1 = add <vscale x 1 x i32> %v32_0, %v32_1 %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1 + %scalar1 = mul i64 %a, %b %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1 %4 = add <vscale x 1 x i64> %0, %2 + %scalar2 = sdiv i64 %a, %b %5 = add <vscale x 1 x i32> %1, %3 %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64> + %scalar3 = add i64 %scalar0, %scalar1 %7 = add <vscale x 1 x i64> %4, %6 - ret <vscale x 1 x i64> %7 + %scalar4 = add i64 %scalar2, %scalar3 + call void @consume(i64 %scalar4, <vscale x 1 x i64> %7) + ret void } >From b86faa86e6e8e661e460ccbe20048141ff0b2c13 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Tue, 2 Dec 2025 17:00:12 +0800 Subject: [PATCH 3/8] Revert llvm/lib/CodeGen/MachineScheduler.cpp change Created using spr 1.3.6-beta.1 --- llvm/lib/CodeGen/MachineScheduler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index efe64788c5efa..de29a9fab876e 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2964,7 +2964,7 @@ void SchedBoundary::bumpNode(SUnit *SU) { unsigned NextCycle = CurrCycle; switch (SchedModel->getMicroOpBufferSize()) { case 0: - // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); break; case 1: if (ReadyCycle > NextCycle) { >From d1578900fc951d219904b3cde2d9f7ae46bce381 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Sun, 4 Jan 2026 14:44:13 +0800 Subject: [PATCH 4/8] Remove private, add EnableVTypeSchedHeuristic option and rework tests Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVMachineScheduler.cpp | 10 +- llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 8 +- llvm/lib/Target/RISCV/RISCVProcessors.td | 4 +- llvm/lib/Target/RISCV/RISCVSubtarget.h | 6 + .../RISCV/rvv/rvv-vtype-based-scheduler.ll | 555 +----------------- 5 files changed, 46 insertions(+), 537 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index e7035ce399bd3..32c95082ccf1e 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -18,6 +18,12 @@ static cl::opt<bool> EnableVTypeSchedHeuristic( cl::desc("Enable scheduling RVV instructions based on vtype heuristic " "(pick instruction with compatible vtype first)")); +bool RISCVPreRAMachineSchedStrategy::enableVTypeSchedHeuristic() const { + if (EnableVTypeSchedHeuristic.getNumOccurrences() > 0) + return EnableVTypeSchedHeuristic; + return ST->enableVTypeSchedHeuristic(); +} + RISCV::VSETVLIInfo RISCVPreRAMachineSchedStrategy::getVSETVLIInfo(const MachineInstr *MI) const { unsigned TSFlags = MI->getDesc().TSFlags; @@ -177,7 +183,7 @@ bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand, // TODO: We should not use `CandReason::Cluster` here, but is there a // mechanism to extend this enum? - if (EnableVTypeSchedHeuristic && + if (enableVTypeSchedHeuristic() && tryVType(getVSETVLIInfo(TryCand.SU->getInstr()), getVSETVLIInfo(Cand.SU->getInstr()), TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -197,7 +203,7 @@ void RISCVPreRAMachineSchedStrategy::leaveMBB() { void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { GenericScheduler::schedNode(SU, IsTopNode); - if (EnableVTypeSchedHeuristic) { + if (enableVTypeSchedHeuristic()) { MachineInstr *MI = SU->getInstr(); const RISCV::VSETVLIInfo &Info = getVSETVLIInfo(MI); if (Info.isValid()) { diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h index 4bd07871a0b97..a4a439f489c2b 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h @@ -21,11 +21,13 @@ namespace llvm { /// A GenericScheduler implementation for RISCV pre RA scheduling. class RISCVPreRAMachineSchedStrategy : public GenericScheduler { -private: + const RISCVSubtarget *ST; RISCV::RISCVVSETVLIInfoAnalysis VIA; RISCV::VSETVLIInfo TopVType; RISCV::VSETVLIInfo BottomVType; + bool enableVTypeSchedHeuristic() const; + RISCV::VSETVLIInfo getVSETVLIInfo(const MachineInstr *MI) const; bool tryVType(RISCV::VSETVLIInfo TryVType, RISCV::VSETVLIInfo CandVtype, SchedCandidate &TryCand, SchedCandidate &Cand, @@ -33,8 +35,8 @@ class RISCVPreRAMachineSchedStrategy : public GenericScheduler { public: RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C) - : GenericScheduler(C), - VIA(&C->MF->getSubtarget<RISCVSubtarget>(), C->LIS) {} + : GenericScheduler(C), ST(&C->MF->getSubtarget<RISCVSubtarget>()), + VIA(ST, C->LIS) {} protected: bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 5becfd2ad502b..7c151fcebcbdf 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -45,6 +45,8 @@ class RISCVTuneInfo { // The direction of PostRA scheduling. code PostRASchedDirection = TopDown; + + bit EnableVTypeSchedHeuristic = 0; } def RISCVTuneInfoTable : GenericTable { @@ -58,7 +60,7 @@ def RISCVTuneInfoTable : GenericTable { "MaxStoresPerMemcpyOptSize", "MaxStoresPerMemcpy", "MaxStoresPerMemmoveOptSize", "MaxStoresPerMemmove", "MaxLoadsPerMemcmpOptSize", "MaxLoadsPerMemcmp", - "PostRASchedDirection"]; + "PostRASchedDirection", "EnableVTypeSchedHeuristic"]; } def getRISCVTuneInfo : SearchIndex { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index ae6ca970c0c49..c4f0705d6864e 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -70,6 +70,8 @@ struct RISCVTuneInfo { // The direction of PostRA scheduling. MISched::Direction PostRASchedDirection; + + bool EnableVTypeSchedHeuristic; }; #define GET_RISCVTuneInfoTable_DECL @@ -433,6 +435,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { return TuneInfo->PostRASchedDirection; } + bool enableVTypeSchedHeuristic() const { + return TuneInfo->EnableVTypeSchedHeuristic; + } + void overrideSchedPolicy(MachineSchedPolicy &Policy, const SchedRegion &Region) const override; diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll index 8c48937e573ea..960906bae316d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=DEFAULT ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -misched-prera-direction=bottomup \ @@ -11,388 +10,18 @@ ; RUN: -riscv-enable-vtype-sched-heuristic -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=VTYPE-SCHED-BIDIRECTIONAL -define void @test(i16 %0, i16 %1, i16 %2, i16 %3, i16 %4, i16 %5, i16 %6, ptr %7, ptr %8, ptr %9, ptr %10, ptr %11, i32 %12) { -; DEFAULT-LABEL: test: -; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: ld a6, 0(sp) -; DEFAULT-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; DEFAULT-NEXT: vle8.v v9, (a7) -; DEFAULT-NEXT: ld a7, 8(sp) -; DEFAULT-NEXT: vle8.v v8, (a6) -; DEFAULT-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; DEFAULT-NEXT: vslidedown.vi v10, v9, 1 -; DEFAULT-NEXT: ld a6, 16(sp) -; DEFAULT-NEXT: vslidedown.vi v11, v8, 1 -; DEFAULT-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; DEFAULT-NEXT: vle8.v v12, (a7) -; DEFAULT-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; DEFAULT-NEXT: vslidedown.vi v13, v12, 1 -; DEFAULT-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; DEFAULT-NEXT: vle8.v v14, (a6) -; DEFAULT-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; DEFAULT-NEXT: vslidedown.vi v15, v14, 1 -; DEFAULT-NEXT: vslidedown.vi v16, v9, 2 -; DEFAULT-NEXT: vslidedown.vi v17, v8, 2 -; DEFAULT-NEXT: vslidedown.vi v18, v12, 2 -; DEFAULT-NEXT: vslidedown.vi v19, v14, 2 -; DEFAULT-NEXT: vslidedown.vi v20, v9, 3 -; DEFAULT-NEXT: vslidedown.vi v21, v8, 3 -; DEFAULT-NEXT: vslidedown.vi v22, v12, 3 -; DEFAULT-NEXT: vslidedown.vi v23, v14, 3 -; DEFAULT-NEXT: vslidedown.vi v24, v9, 4 -; DEFAULT-NEXT: vslidedown.vi v25, v8, 4 -; DEFAULT-NEXT: vslidedown.vi v26, v12, 4 -; DEFAULT-NEXT: vslidedown.vi v27, v14, 4 -; DEFAULT-NEXT: vslidedown.vi v28, v9, 5 -; DEFAULT-NEXT: vslidedown.vi v29, v8, 5 -; DEFAULT-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; DEFAULT-NEXT: vmv.v.i v30, 0 -; DEFAULT-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; DEFAULT-NEXT: vslidedown.vi v31, v12, 5 -; DEFAULT-NEXT: vmv1r.v v7, v30 -; DEFAULT-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; DEFAULT-NEXT: vwmaccsu.vx v7, a0, v9 -; DEFAULT-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; DEFAULT-NEXT: vslidedown.vi v9, v14, 5 -; DEFAULT-NEXT: vmv1r.v v6, v30 -; DEFAULT-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; DEFAULT-NEXT: vwmaccsu.vx v6, a0, v8 -; DEFAULT-NEXT: vwmaccsu.vx v7, a1, v10 -; DEFAULT-NEXT: vmv1r.v v8, v30 -; DEFAULT-NEXT: vwmaccsu.vx v8, a0, v12 -; DEFAULT-NEXT: vwmaccsu.vx v6, a1, v11 -; DEFAULT-NEXT: vwmaccsu.vx v7, a2, v16 -; DEFAULT-NEXT: vwmaccsu.vx v30, a0, v14 -; DEFAULT-NEXT: vwmaccsu.vx v8, a1, v13 -; DEFAULT-NEXT: vwmaccsu.vx v6, a2, v17 -; DEFAULT-NEXT: vwmaccsu.vx v7, a3, v20 -; DEFAULT-NEXT: vwmaccsu.vx v30, a1, v15 -; DEFAULT-NEXT: vwmaccsu.vx v8, a2, v18 -; DEFAULT-NEXT: vwmaccsu.vx v6, a3, v21 -; DEFAULT-NEXT: vwmaccsu.vx v7, a4, v24 -; DEFAULT-NEXT: vwmaccsu.vx v30, a2, v19 -; DEFAULT-NEXT: vwmaccsu.vx v8, a3, v22 -; DEFAULT-NEXT: vwmaccsu.vx v6, a4, v25 -; DEFAULT-NEXT: vwmaccsu.vx v7, a5, v28 -; DEFAULT-NEXT: vwmaccsu.vx v30, a3, v23 -; DEFAULT-NEXT: vwmaccsu.vx v8, a4, v26 -; DEFAULT-NEXT: vwmaccsu.vx v6, a5, v29 -; DEFAULT-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; DEFAULT-NEXT: vmax.vx v10, v7, zero -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; DEFAULT-NEXT: vwmaccsu.vx v30, a4, v27 -; DEFAULT-NEXT: vwmaccsu.vx v8, a5, v31 -; DEFAULT-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; DEFAULT-NEXT: vmax.vx v11, v6, zero -; DEFAULT-NEXT: csrwi vxrm, 0 -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; DEFAULT-NEXT: vnclipu.wi v10, v10, 6 -; DEFAULT-NEXT: vwmaccsu.vx v30, a5, v9 -; DEFAULT-NEXT: ld a0, 24(sp) -; DEFAULT-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; DEFAULT-NEXT: vmax.vx v8, v8, zero -; DEFAULT-NEXT: lw a1, 32(sp) -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; DEFAULT-NEXT: vnclipu.wi v9, v11, 6 -; DEFAULT-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; DEFAULT-NEXT: vmax.vx v11, v30, zero -; DEFAULT-NEXT: vse8.v v10, (a0) -; DEFAULT-NEXT: add a2, a0, a1 -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; DEFAULT-NEXT: vnclipu.wi v8, v8, 6 -; DEFAULT-NEXT: vse8.v v9, (a2) -; DEFAULT-NEXT: sh1add a2, a1, a0 -; DEFAULT-NEXT: vnclipu.wi v9, v11, 6 -; DEFAULT-NEXT: sh1add a1, a1, a1 -; DEFAULT-NEXT: vse8.v v8, (a2) -; DEFAULT-NEXT: add a0, a0, a1 -; DEFAULT-NEXT: vse8.v v9, (a0) -; DEFAULT-NEXT: ret -; -; VTYPE-SCHED-BOTTOMUP-LABEL: test: -; VTYPE-SCHED-BOTTOMUP: # %bb.0: # %entry -; VTYPE-SCHED-BOTTOMUP-NEXT: ld a6, 0(sp) -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vle8.v v9, (a7) -; VTYPE-SCHED-BOTTOMUP-NEXT: ld a7, 8(sp) -; VTYPE-SCHED-BOTTOMUP-NEXT: vle8.v v8, (a6) -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v10, v9, 1 -; VTYPE-SCHED-BOTTOMUP-NEXT: ld a6, 16(sp) -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vle8.v v11, (a7) -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v12, v8, 1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vle8.v v13, (a6) -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v14, v11, 1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v15, v13, 1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v16, v9, 2 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v17, v8, 2 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v18, v11, 2 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v19, v13, 2 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v20, v9, 3 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v21, v8, 3 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v22, v11, 3 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v23, v13, 3 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v24, v9, 4 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v25, v8, 4 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v26, v11, 4 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v27, v13, 4 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v28, v9, 5 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v29, v8, 5 -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v30, v11, 5 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.v.i v31, 0 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vslidedown.vi v7, v13, 5 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv1r.v v6, v31 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v6, a0, v9 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv1r.v v9, v31 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v9, a0, v8 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v6, a1, v10 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv1r.v v8, v31 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v8, a0, v11 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v31, a0, v13 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v9, a1, v12 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v6, a2, v16 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v8, a1, v14 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v31, a1, v15 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v9, a2, v17 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v6, a3, v20 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v8, a2, v18 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v31, a2, v19 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v9, a3, v21 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v6, a4, v24 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v8, a3, v22 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v31, a3, v23 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v9, a4, v25 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v6, a5, v28 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v8, a4, v26 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v31, a4, v27 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v9, a5, v29 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmax.vx v10, v6, zero -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v8, a5, v30 -; VTYPE-SCHED-BOTTOMUP-NEXT: vwmaccsu.vx v31, a5, v7 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmax.vx v9, v9, zero -; VTYPE-SCHED-BOTTOMUP-NEXT: csrwi vxrm, 0 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vnclipu.wi v10, v10, 6 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmax.vx v8, v8, zero -; VTYPE-SCHED-BOTTOMUP-NEXT: ld a0, 24(sp) -; VTYPE-SCHED-BOTTOMUP-NEXT: vmax.vx v11, v31, zero -; VTYPE-SCHED-BOTTOMUP-NEXT: lw a1, 32(sp) -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vnclipu.wi v9, v9, 6 -; VTYPE-SCHED-BOTTOMUP-NEXT: vnclipu.wi v8, v8, 6 -; VTYPE-SCHED-BOTTOMUP-NEXT: vse8.v v10, (a0) -; VTYPE-SCHED-BOTTOMUP-NEXT: vnclipu.wi v10, v11, 6 -; VTYPE-SCHED-BOTTOMUP-NEXT: add a2, a0, a1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vse8.v v9, (a2) -; VTYPE-SCHED-BOTTOMUP-NEXT: sh1add a2, a1, a0 -; VTYPE-SCHED-BOTTOMUP-NEXT: sh1add a1, a1, a1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vse8.v v8, (a2) -; VTYPE-SCHED-BOTTOMUP-NEXT: add a0, a0, a1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vse8.v v10, (a0) -; VTYPE-SCHED-BOTTOMUP-NEXT: ret -; -; VTYPE-SCHED-TOPDOWN-LABEL: test: -; VTYPE-SCHED-TOPDOWN: # %bb.0: # %entry -; VTYPE-SCHED-TOPDOWN-NEXT: lw t1, 32(sp) -; VTYPE-SCHED-TOPDOWN-NEXT: ld a6, 24(sp) -; VTYPE-SCHED-TOPDOWN-NEXT: ld t2, 16(sp) -; VTYPE-SCHED-TOPDOWN-NEXT: ld t3, 8(sp) -; VTYPE-SCHED-TOPDOWN-NEXT: ld t4, 0(sp) -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vle8.v v12, (a7) -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.v.i v8, 0 -; VTYPE-SCHED-TOPDOWN-NEXT: csrwi vxrm, 0 -; VTYPE-SCHED-TOPDOWN-NEXT: sh1add t5, t1, t1 -; VTYPE-SCHED-TOPDOWN-NEXT: add t0, a6, t1 -; VTYPE-SCHED-TOPDOWN-NEXT: sh1add a7, t1, a6 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vle8.v v13, (t4) -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v16, v12, 1 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv1r.v v9, v8 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv1r.v v10, v8 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv1r.v v11, v8 -; VTYPE-SCHED-TOPDOWN-NEXT: add t1, a6, t5 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vle8.v v14, (t2) -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v9, a0, v12 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v10, a0, v13 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v17, v13, 1 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vle8.v v15, (t3) -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v8, a0, v14 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v11, a0, v15 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v9, a1, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v10, a1, v17 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v16, v15, 1 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v17, v14, 1 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v18, v12, 2 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v11, a1, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v8, a1, v17 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v16, v13, 2 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v17, v15, 2 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v19, v14, 2 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v9, a2, v18 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v10, a2, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v11, a2, v17 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v8, a2, v19 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v16, v12, 3 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v17, v13, 3 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v18, v15, 3 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v19, v14, 3 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v9, a3, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v10, a3, v17 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v11, a3, v18 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v8, a3, v19 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v16, v12, 4 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v17, v13, 4 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v18, v15, 4 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v19, v14, 4 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v10, a4, v17 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v9, a4, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v11, a4, v18 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v8, a4, v19 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v12, v12, 5 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v13, v13, 5 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v15, v15, 5 -; VTYPE-SCHED-TOPDOWN-NEXT: vslidedown.vi v14, v14, 5 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v9, a5, v12 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v10, a5, v13 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v11, a5, v15 -; VTYPE-SCHED-TOPDOWN-NEXT: vwmaccsu.vx v8, a5, v14 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vmax.vx v9, v9, zero -; VTYPE-SCHED-TOPDOWN-NEXT: vmax.vx v10, v10, zero -; VTYPE-SCHED-TOPDOWN-NEXT: vmax.vx v11, v11, zero -; VTYPE-SCHED-TOPDOWN-NEXT: vmax.vx v8, v8, zero -; VTYPE-SCHED-TOPDOWN-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vnclipu.wi v9, v9, 6 -; VTYPE-SCHED-TOPDOWN-NEXT: vnclipu.wi v10, v10, 6 -; VTYPE-SCHED-TOPDOWN-NEXT: vnclipu.wi v11, v11, 6 -; VTYPE-SCHED-TOPDOWN-NEXT: vse8.v v9, (a6) -; VTYPE-SCHED-TOPDOWN-NEXT: vnclipu.wi v8, v8, 6 -; VTYPE-SCHED-TOPDOWN-NEXT: vse8.v v10, (t0) -; VTYPE-SCHED-TOPDOWN-NEXT: vse8.v v11, (a7) -; VTYPE-SCHED-TOPDOWN-NEXT: vse8.v v8, (t1) -; VTYPE-SCHED-TOPDOWN-NEXT: ret -; -; VTYPE-SCHED-BIDIRECTIONAL-LABEL: test: -; VTYPE-SCHED-BIDIRECTIONAL: # %bb.0: # %entry -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: ld a6, 0(sp) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle8.v v8, (a7) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: ld a7, 8(sp) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle8.v v9, (a6) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v10, v8, 1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: ld a6, 16(sp) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v11, v9, 1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle8.v v12, (a7) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v13, v12, 1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle8.v v14, (a6) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v15, v14, 1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v16, v8, 2 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v17, v9, 2 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v18, v12, 2 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v19, v14, 2 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v20, v8, 3 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v21, v9, 3 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v22, v12, 3 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v23, v14, 3 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v24, v8, 4 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v25, v9, 4 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v26, v12, 4 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v27, v14, 4 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v28, v8, 5 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v29, v9, 5 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v30, v12, 5 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.v.i v31, 0 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vslidedown.vi v7, v14, 5 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv1r.v v6, v31 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v6, a0, v8 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv1r.v v8, v31 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v8, a0, v9 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v6, a1, v10 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv1r.v v9, v31 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v9, a0, v12 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v31, a0, v14 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v8, a1, v11 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v6, a2, v16 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v9, a1, v13 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v31, a1, v15 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v8, a2, v17 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v6, a3, v20 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v9, a2, v18 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v31, a2, v19 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v8, a3, v21 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v6, a4, v24 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v9, a3, v22 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v31, a3, v23 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v8, a4, v25 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v6, a5, v28 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v9, a4, v26 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v31, a4, v27 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v8, a5, v29 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmax.vx v10, v6, zero -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v9, a5, v30 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vwmaccsu.vx v31, a5, v7 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmax.vx v8, v8, zero -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: csrwi vxrm, 0 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vnclipu.wi v10, v10, 6 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: ld a0, 24(sp) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmax.vx v9, v9, zero -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: lw a1, 32(sp) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmax.vx v11, v31, zero -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vnclipu.wi v8, v8, 6 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vse8.v v10, (a0) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vnclipu.wi v9, v9, 6 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: add a2, a0, a1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: sh1add a3, a1, a0 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vse8.v v8, (a2) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vnclipu.wi v8, v11, 6 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: sh1add a1, a1, a1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vse8.v v9, (a3) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: add a0, a0, a1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vse8.v v8, (a0) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: ret +define void @test0(i16 %0, i16 %1, i16 %2, i16 %3, i16 %4, i16 %5, i16 %6, ptr %7, ptr %8, ptr %9, ptr %10, ptr %11, i32 %12) { +; DEFAULT-LABEL: test0: +; DEFAULT-COUNT-19: vset + +; VTYPE-SCHED-BOTTOMUP-LABEL: test0: +; VTYPE-SCHED-BOTTOMUP-COUNT-15: vset + +; VTYPE-SCHED-TOPDOWN-LABEL: test0: +; VTYPE-SCHED-TOPDOWN-COUNT-19: vset + +; VTYPE-SCHED-BIDIRECTIONAL-LABEL: test0: +; VTYPE-SCHED-BIDIRECTIONAL-15: vset entry: %14 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.p0.i64(<vscale x 8 x i8> poison, ptr %7, i64 16) %15 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.p0.i64(<vscale x 8 x i8> poison, ptr %8, i64 16) @@ -496,154 +125,18 @@ entry: ret void } -define void @foo(ptr %0, ptr %1, ptr %2, ptr %3, ptr %4) { -; DEFAULT-LABEL: foo: -; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; DEFAULT-NEXT: vle64.v v8, (a1) -; DEFAULT-NEXT: vle64.v v10, (a2) -; DEFAULT-NEXT: vmsltu.vv v12, v8, v10 -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; DEFAULT-NEXT: vmv.x.s a1, v12 -; DEFAULT-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; DEFAULT-NEXT: vmseq.vv v14, v8, v10 -; DEFAULT-NEXT: vle64.v v12, (a3) -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; DEFAULT-NEXT: vmv.x.s a2, v14 -; DEFAULT-NEXT: vle64.v v14, (a4) -; DEFAULT-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; DEFAULT-NEXT: vmsltu.vv v16, v12, v14 -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; DEFAULT-NEXT: vmv.x.s a3, v16 -; DEFAULT-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; DEFAULT-NEXT: vmseq.vv v16, v12, v14 -; DEFAULT-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; DEFAULT-NEXT: vmv.x.s a4, v16 -; DEFAULT-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; DEFAULT-NEXT: vsub.vv v8, v8, v10 -; DEFAULT-NEXT: sh1add a1, a1, a2 -; DEFAULT-NEXT: xor a1, a1, a2 -; DEFAULT-NEXT: vmv.s.x v0, a1 -; DEFAULT-NEXT: vmv.v.i v10, 0 -; DEFAULT-NEXT: vmerge.vim v16, v10, 1, v0 -; DEFAULT-NEXT: vsub.vv v8, v8, v16 -; DEFAULT-NEXT: sh1add a1, a3, a4 -; DEFAULT-NEXT: xor a1, a1, a4 -; DEFAULT-NEXT: vmv.s.x v0, a1 -; DEFAULT-NEXT: vsub.vv v12, v12, v14 -; DEFAULT-NEXT: vmerge.vim v10, v10, 1, v0 -; DEFAULT-NEXT: vsub.vv v10, v12, v10 -; DEFAULT-NEXT: vand.vv v8, v10, v8 -; DEFAULT-NEXT: vse64.v v8, (a0) -; DEFAULT-NEXT: ret -; -; VTYPE-SCHED-BOTTOMUP-LABEL: foo: -; VTYPE-SCHED-BOTTOMUP: # %bb.0: # %entry -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vle64.v v12, (a1) -; VTYPE-SCHED-BOTTOMUP-NEXT: vle64.v v14, (a2) -; VTYPE-SCHED-BOTTOMUP-NEXT: vle64.v v8, (a3) -; VTYPE-SCHED-BOTTOMUP-NEXT: vle64.v v10, (a4) -; VTYPE-SCHED-BOTTOMUP-NEXT: vmsltu.vv v16, v12, v14 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmsltu.vv v17, v8, v10 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.x.s a1, v16 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.x.s a2, v17 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmseq.vv v16, v12, v14 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmseq.vv v17, v8, v10 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.x.s a3, v16 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.x.s a4, v17 -; VTYPE-SCHED-BOTTOMUP-NEXT: sh1add a1, a1, a3 -; VTYPE-SCHED-BOTTOMUP-NEXT: xor a1, a1, a3 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.s.x v0, a1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; VTYPE-SCHED-BOTTOMUP-NEXT: vsub.vv v12, v12, v14 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.v.i v14, 0 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmerge.vim v16, v14, 1, v0 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsub.vv v12, v12, v16 -; VTYPE-SCHED-BOTTOMUP-NEXT: sh1add a1, a2, a4 -; VTYPE-SCHED-BOTTOMUP-NEXT: xor a1, a1, a4 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmv.s.x v0, a1 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsub.vv v8, v8, v10 -; VTYPE-SCHED-BOTTOMUP-NEXT: vmerge.vim v10, v14, 1, v0 -; VTYPE-SCHED-BOTTOMUP-NEXT: vsub.vv v8, v8, v10 -; VTYPE-SCHED-BOTTOMUP-NEXT: vand.vv v8, v8, v12 -; VTYPE-SCHED-BOTTOMUP-NEXT: vse64.v v8, (a0) -; VTYPE-SCHED-BOTTOMUP-NEXT: ret -; -; VTYPE-SCHED-TOPDOWN-LABEL: foo: -; VTYPE-SCHED-TOPDOWN: # %bb.0: # %entry -; VTYPE-SCHED-TOPDOWN-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vle64.v v10, (a1) -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.v.i v8, 0 -; VTYPE-SCHED-TOPDOWN-NEXT: vle64.v v12, (a2) -; VTYPE-SCHED-TOPDOWN-NEXT: vle64.v v14, (a3) -; VTYPE-SCHED-TOPDOWN-NEXT: vmsltu.vv v18, v10, v12 -; VTYPE-SCHED-TOPDOWN-NEXT: vmseq.vv v19, v10, v12 -; VTYPE-SCHED-TOPDOWN-NEXT: vle64.v v16, (a4) -; VTYPE-SCHED-TOPDOWN-NEXT: vsub.vv v10, v10, v12 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.x.s a1, v18 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.x.s a2, v19 -; VTYPE-SCHED-TOPDOWN-NEXT: sh1add a1, a1, a2 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vmsltu.vv v18, v14, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: xor a1, a1, a2 -; VTYPE-SCHED-TOPDOWN-NEXT: vsub.vv v12, v14, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vmseq.vv v19, v14, v16 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.s.x v0, a1 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.x.s a1, v18 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.x.s a2, v19 -; VTYPE-SCHED-TOPDOWN-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; VTYPE-SCHED-TOPDOWN-NEXT: vmerge.vim v14, v8, 1, v0 -; VTYPE-SCHED-TOPDOWN-NEXT: sh1add a1, a1, a2 -; VTYPE-SCHED-TOPDOWN-NEXT: xor a1, a1, a2 -; VTYPE-SCHED-TOPDOWN-NEXT: vsub.vv v10, v10, v14 -; VTYPE-SCHED-TOPDOWN-NEXT: vmv.s.x v0, a1 -; VTYPE-SCHED-TOPDOWN-NEXT: vmerge.vim v8, v8, 1, v0 -; VTYPE-SCHED-TOPDOWN-NEXT: vsub.vv v8, v12, v8 -; VTYPE-SCHED-TOPDOWN-NEXT: vand.vv v8, v8, v10 -; VTYPE-SCHED-TOPDOWN-NEXT: vse64.v v8, (a0) -; VTYPE-SCHED-TOPDOWN-NEXT: ret -; -; VTYPE-SCHED-BIDIRECTIONAL-LABEL: foo: -; VTYPE-SCHED-BIDIRECTIONAL: # %bb.0: # %entry -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle64.v v12, (a1) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle64.v v14, (a2) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle64.v v8, (a3) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmsltu.vv v16, v12, v14 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vle64.v v10, (a4) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmsltu.vv v17, v8, v10 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.x.s a1, v16 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.x.s a2, v17 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmseq.vv v16, v12, v14 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmseq.vv v17, v8, v10 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.x.s a3, v16 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.x.s a4, v17 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: sh1add a1, a1, a3 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: xor a1, a1, a3 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.s.x v0, a1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsub.vv v12, v12, v14 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.v.i v14, 0 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmerge.vim v16, v14, 1, v0 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsub.vv v12, v12, v16 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: sh1add a1, a2, a4 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: xor a1, a1, a4 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmv.s.x v0, a1 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsub.vv v8, v8, v10 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vmerge.vim v10, v14, 1, v0 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vsub.vv v8, v8, v10 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vand.vv v8, v8, v12 -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: vse64.v v8, (a0) -; VTYPE-SCHED-BIDIRECTIONAL-NEXT: ret +define void @test1(ptr %0, ptr %1, ptr %2, ptr %3, ptr %4) { +; DEFAULT-LABEL: test1: +; DEFAULT-COUNT-9: vset + +; VTYPE-SCHED-BOTTOMUP-LABEL: test1: +; VTYPE-SCHED-BOTTOMUP-COUNT-5: vset + +; VTYPE-SCHED-TOPDOWN-LABEL: test1: +; VTYPE-SCHED-TOPDOWN-COUNT-5: vset + +; VTYPE-SCHED-BIDIRECTIONAL-LABEL: test1: +; VTYPE-SCHED-BIDIRECTIONAL-5: vset entry: %5 = load <8 x i64>, ptr %1, align 64 %6 = load <8 x i64>, ptr %2, align 64 >From 4cfb31e42ee10c00ec24c42a45ca6fd1c117a9fb Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Fri, 9 Jan 2026 14:29:19 +0800 Subject: [PATCH 5/8] It should be related to both vtype and vl Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVFeatures.td | 6 +++--- llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp | 8 ++++---- llvm/test/CodeGen/RISCV/features-info.ll | 2 +- llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 5507dda807a69..f5f54665c5a42 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1843,9 +1843,9 @@ def TuneDisableLatencySchedHeuristic : SubtargetFeature<"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; -def TuneEnableVTypeSchedHeuristic - : SubtargetFeature<"enable-vtype-sched-heuristic", "EnableVTypeSchedHeuristic", "true", - "Enable vtype-based scheduling heuristic">; +def TuneEnableVsetvliSchedHeuristic + : SubtargetFeature<"enable-vsetvli-sched-heuristic", "EnableVsetvliSchedHeuristic", "true", + "Enable vsetvli-based scheduling heuristic">; def TunePredictableSelectIsExpensive : SubtargetFeature<"predictable-select-expensive", "PredictableSelectIsExpensive", "true", diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index 0b302c1669975..77204a668f38e 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -26,14 +26,14 @@ bool RISCVPreRAMachineSchedStrategy::tryVType(RISCV::VSETVLIInfo TryVType, SchedCandidate &TryCand, SchedCandidate &Cand, CandReason Reason) const { - // Do not compare the vtype changes between top and bottom + // Do not compare the vsetvli info changes between top and bottom // boundary. if (Cand.AtTop != TryCand.AtTop) return false; // Try Cand first. // We prefer the top node as it is straightforward from the perspective of - // vtype dataflow. + // vsetvli dataflow. if (CandVtype.isValid() && TopVType.isValid() && Cand.AtTop && CandVtype == TopVType) return true; @@ -172,7 +172,7 @@ bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand, // TODO: We should not use `CandReason::Cluster` here, but is there a // mechanism to extend this enum? - if (ST->enableVTypeSchedHeuristic() && + if (ST->enableVsetvliSchedHeuristic() && tryVType(getVSETVLIInfo(TryCand.SU->getInstr()), getVSETVLIInfo(Cand.SU->getInstr()), TryCand, Cand, Cluster)) return TryCand.Reason != NoCand; @@ -192,7 +192,7 @@ void RISCVPreRAMachineSchedStrategy::leaveMBB() { void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { GenericScheduler::schedNode(SU, IsTopNode); - if (ST->enableVTypeSchedHeuristic()) { + if (ST->enableVsetvliSchedHeuristic()) { MachineInstr *MI = SU->getInstr(); const RISCV::VSETVLIInfo &Info = getVSETVLIInfo(MI); if (Info.isValid()) { diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 4a6c7d0ee7a43..99168e3f16a75 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -23,7 +23,7 @@ ; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler. ; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN. ; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs). -; CHECK-NEXT: enable-vtype-sched-heuristic - Enable vtype-based scheduling heuristic. +; CHECK-NEXT: enable-vsetvli-sched-heuristic - Enable vsetvli-based scheduling heuristic. ; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation). ; CHECK-NEXT: experimental - Experimental intrinsics. ; CHECK-NEXT: experimental-p - 'P' ('Base P' (Packed SIMD)). diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll index f872a9c33ef8d..ec34341cc1984 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll @@ -1,13 +1,13 @@ ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=DEFAULT ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -misched-prera-direction=bottomup \ -; RUN: -mattr=+enable-vtype-sched-heuristic -verify-machineinstrs < %s \ +; RUN: -mattr=+enable-vsetvli-sched-heuristic -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=VTYPE-SCHED-BOTTOMUP ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -misched-prera-direction=topdown \ -; RUN: -mattr=+enable-vtype-sched-heuristic -verify-machineinstrs < %s \ +; RUN: -mattr=+enable-vsetvli-sched-heuristic -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=VTYPE-SCHED-TOPDOWN ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -misched-prera-direction=bidirectional \ -; RUN: -mattr=+enable-vtype-sched-heuristic -verify-machineinstrs < %s \ +; RUN: -mattr=+enable-vsetvli-sched-heuristic -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefix=VTYPE-SCHED-BIDIRECTIONAL define void @test0(i16 %0, i16 %1, i16 %2, i16 %3, i16 %4, i16 %5, i16 %6, ptr %7, ptr %8, ptr %9, ptr %10, ptr %11, i32 %12) { >From 1c5a91ca3c835934f8bec1aced1c22af4ec7c609 Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Fri, 9 Jan 2026 14:35:25 +0800 Subject: [PATCH 6/8] Rename variables Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVMachineScheduler.cpp | 43 ++++++++++--------- llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 10 ++--- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index 77204a668f38e..52b81927c17de 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -21,11 +21,11 @@ RISCVPreRAMachineSchedStrategy::getVSETVLIInfo(const MachineInstr *MI) const { return VIA.computeInfoForInstr(*MI); } -bool RISCVPreRAMachineSchedStrategy::tryVType(RISCV::VSETVLIInfo TryVType, - RISCV::VSETVLIInfo CandVtype, - SchedCandidate &TryCand, - SchedCandidate &Cand, - CandReason Reason) const { +bool RISCVPreRAMachineSchedStrategy::tryVSETVLIInfo(RISCV::VSETVLIInfo TryInfo, + RISCV::VSETVLIInfo CandInfo, + SchedCandidate &TryCand, + SchedCandidate &Cand, + CandReason Reason) const { // Do not compare the vsetvli info changes between top and bottom // boundary. if (Cand.AtTop != TryCand.AtTop) @@ -34,23 +34,23 @@ bool RISCVPreRAMachineSchedStrategy::tryVType(RISCV::VSETVLIInfo TryVType, // Try Cand first. // We prefer the top node as it is straightforward from the perspective of // vsetvli dataflow. - if (CandVtype.isValid() && TopVType.isValid() && Cand.AtTop && - CandVtype == TopVType) + if (CandInfo.isValid() && TopInfo.isValid() && Cand.AtTop && + CandInfo == TopInfo) return true; - if (CandVtype.isValid() && BottomVType.isValid() && !Cand.AtTop && - CandVtype == BottomVType) + if (CandInfo.isValid() && BottomInfo.isValid() && !Cand.AtTop && + CandInfo == BottomInfo) return true; // Then try TryCand. - if (TryVType.isValid() && TopVType.isValid() && TryCand.AtTop && - TryVType == TopVType) { + if (TryInfo.isValid() && TopInfo.isValid() && TryCand.AtTop && + TryInfo == TopInfo) { TryCand.Reason = Reason; return true; } - if (TryVType.isValid() && BottomVType.isValid() && !TryCand.AtTop && - TryVType == BottomVType) { + if (TryInfo.isValid() && BottomInfo.isValid() && !TryCand.AtTop && + TryInfo == BottomInfo) { TryCand.Reason = Reason; return true; } @@ -173,21 +173,22 @@ bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand, // TODO: We should not use `CandReason::Cluster` here, but is there a // mechanism to extend this enum? if (ST->enableVsetvliSchedHeuristic() && - tryVType(getVSETVLIInfo(TryCand.SU->getInstr()), - getVSETVLIInfo(Cand.SU->getInstr()), TryCand, Cand, Cluster)) + tryVSETVLIInfo(getVSETVLIInfo(TryCand.SU->getInstr()), + getVSETVLIInfo(Cand.SU->getInstr()), TryCand, Cand, + Cluster)) return TryCand.Reason != NoCand; return TryCand.Reason != NoCand; } void RISCVPreRAMachineSchedStrategy::enterMBB(MachineBasicBlock *MBB) { - TopVType = RISCV::VSETVLIInfo(); - BottomVType = RISCV::VSETVLIInfo(); + TopInfo = RISCV::VSETVLIInfo(); + BottomInfo = RISCV::VSETVLIInfo(); } void RISCVPreRAMachineSchedStrategy::leaveMBB() { - TopVType = RISCV::VSETVLIInfo(); - BottomVType = RISCV::VSETVLIInfo(); + TopInfo = RISCV::VSETVLIInfo(); + BottomInfo = RISCV::VSETVLIInfo(); } void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { @@ -197,9 +198,9 @@ void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { const RISCV::VSETVLIInfo &Info = getVSETVLIInfo(MI); if (Info.isValid()) { if (IsTopNode) - TopVType = Info; + TopInfo = Info; else - BottomVType = Info; + BottomInfo = Info; LLVM_DEBUG({ dbgs() << "Previous scheduled Unit: \n"; dbgs() << " IsTop: " << IsTopNode << "\n"; diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h index 301dc15e9dc49..2133813d6e25b 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h @@ -23,13 +23,13 @@ namespace llvm { class RISCVPreRAMachineSchedStrategy : public GenericScheduler { const RISCVSubtarget *ST; RISCV::RISCVVSETVLIInfoAnalysis VIA; - RISCV::VSETVLIInfo TopVType; - RISCV::VSETVLIInfo BottomVType; + RISCV::VSETVLIInfo TopInfo; + RISCV::VSETVLIInfo BottomInfo; RISCV::VSETVLIInfo getVSETVLIInfo(const MachineInstr *MI) const; - bool tryVType(RISCV::VSETVLIInfo TryVType, RISCV::VSETVLIInfo CandVtype, - SchedCandidate &TryCand, SchedCandidate &Cand, - CandReason Reason) const; + bool tryVSETVLIInfo(RISCV::VSETVLIInfo TryInfo, RISCV::VSETVLIInfo CandInfo, + SchedCandidate &TryCand, SchedCandidate &Cand, + CandReason Reason) const; public: RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C) >From 06165daa22d869af0fb5da1b73285640a90c7cde Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Fri, 9 Jan 2026 14:51:18 +0800 Subject: [PATCH 7/8] Use isCompatible Created using spr 1.3.6-beta.1 --- .../Target/RISCV/RISCVMachineScheduler.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index 52b81927c17de..c45892cd513d3 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -31,26 +31,29 @@ bool RISCVPreRAMachineSchedStrategy::tryVSETVLIInfo(RISCV::VSETVLIInfo TryInfo, if (Cand.AtTop != TryCand.AtTop) return false; + auto IsCompatible = [&](RISCV::VSETVLIInfo FirstInfo, + RISCV::VSETVLIInfo SecondInfo) { + return FirstInfo.isValid() && SecondInfo.isValid() && + FirstInfo.isCompatible(RISCV::DemandedFields::all(), SecondInfo, + Context->LIS); + }; + // Try Cand first. // We prefer the top node as it is straightforward from the perspective of // vsetvli dataflow. - if (CandInfo.isValid() && TopInfo.isValid() && Cand.AtTop && - CandInfo == TopInfo) + if (Cand.AtTop && IsCompatible(CandInfo, TopInfo)) return true; - if (CandInfo.isValid() && BottomInfo.isValid() && !Cand.AtTop && - CandInfo == BottomInfo) + if (!Cand.AtTop && IsCompatible(CandInfo, BottomInfo)) return true; // Then try TryCand. - if (TryInfo.isValid() && TopInfo.isValid() && TryCand.AtTop && - TryInfo == TopInfo) { + if (TryCand.AtTop && IsCompatible(TryInfo, TopInfo)) { TryCand.Reason = Reason; return true; } - if (TryInfo.isValid() && BottomInfo.isValid() && !TryCand.AtTop && - TryInfo == BottomInfo) { + if (!TryCand.AtTop && IsCompatible(TryInfo, BottomInfo)) { TryCand.Reason = Reason; return true; } >From 5541d0e506a2879dcb38d14479d8134a86c035ce Mon Sep 17 00:00:00 2001 From: Wang Pengcheng <[email protected]> Date: Mon, 12 Jan 2026 11:59:03 +0800 Subject: [PATCH 8/8] Add & Created using spr 1.3.6-beta.1 --- llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp | 6 +++--- llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp index 1d7b219eb2dc0..c9285721d8544 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp @@ -22,15 +22,15 @@ RISCVPreRAMachineSchedStrategy::getVSETVLIInfo(const MachineInstr *MI) const { } bool RISCVPreRAMachineSchedStrategy::tryVSETVLIInfo( - const RISCV::VSETVLIInfo TryInfo, const RISCV::VSETVLIInfo CandInfo, + const RISCV::VSETVLIInfo &TryInfo, const RISCV::VSETVLIInfo &CandInfo, SchedCandidate &TryCand, SchedCandidate &Cand, CandReason Reason) const { // Do not compare the vsetvli info changes between top and bottom // boundary. if (Cand.AtTop != TryCand.AtTop) return false; - auto IsCompatible = [&](const RISCV::VSETVLIInfo FirstInfo, - const RISCV::VSETVLIInfo SecondInfo) { + auto IsCompatible = [&](const RISCV::VSETVLIInfo &FirstInfo, + const RISCV::VSETVLIInfo &SecondInfo) { return FirstInfo.isValid() && SecondInfo.isValid() && FirstInfo.isCompatible(RISCV::DemandedFields::all(), SecondInfo, Context->LIS); diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h index 86d9715a1da3f..5d5dd3934f0c4 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h +++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h @@ -27,8 +27,8 @@ class RISCVPreRAMachineSchedStrategy : public GenericScheduler { RISCV::VSETVLIInfo BottomInfo; RISCV::VSETVLIInfo getVSETVLIInfo(const MachineInstr *MI) const; - bool tryVSETVLIInfo(const RISCV::VSETVLIInfo TryInfo, - const RISCV::VSETVLIInfo CandInfo, + bool tryVSETVLIInfo(const RISCV::VSETVLIInfo &TryInfo, + const RISCV::VSETVLIInfo &CandInfo, SchedCandidate &TryCand, SchedCandidate &Cand, CandReason Reason) const; _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
