llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> If we have a v_mov_b32 or v_accvgpr_write_b32 with an inline immediate, replace it with a pseudo which writes to the combined AV_* class. This relaxes the operand constraints, which will allow the allocator to inflate the register class to AV_* to potentially avoid spilling. The allocator does not know how to replace an instruction to enable the change of register class. I originally tried to do this by changing all of the places we introduce v_mov_b32 with immediate, but it's along tail of niche cases that require manual updating. Plus we can restrict this to only run on functions where we know we will be allocating AGPRs. --- Patch is 69.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149292.diff 19 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+1) - (added) llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp (+108) - (added) llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h (+23) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+13) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+2) - (modified) llvm/lib/Target/AMDGPU/CMakeLists.txt (+1) - (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (-1) - (modified) llvm/test/CodeGen/AMDGPU/agpr-remat.ll (+9-9) - (added) llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir (+95) - (modified) llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll (+23-23) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+4) - (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.ll (+4-6) - (modified) llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-mov.mir (+14-14) - (modified) llvm/test/CodeGen/AMDGPU/no-fold-accvgpr-read.mir (+13-13) - (modified) llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll (+10-10) - (modified) llvm/test/CodeGen/AMDGPU/spill-vector-superclass.ll (+3-3) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 23f106a9c1d4d..007b481f84960 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -153,6 +153,9 @@ struct AMDGPULowerBufferFatPointersPass const TargetMachine &TM; }; +void initializeAMDGPUPrepareAGPRAllocLegacyPass(PassRegistry &); +extern char &AMDGPUPrepareAGPRAllocLegacyID; + void initializeAMDGPUReserveWWMRegsLegacyPass(PassRegistry &); extern char &AMDGPUReserveWWMRegsLegacyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 250547acb1ee7..b6c6d927d0e89 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -114,6 +114,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass()) +MACHINE_FUNCTION_PASS("amdgpu-prepare-agpr-alloc", AMDGPUPrepareAGPRAllocPass()) MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) MACHINE_FUNCTION_PASS("amdgpu-wait-sgpr-hazards", AMDGPUWaitSGPRHazardsPass()) MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp new file mode 100644 index 0000000000000..63a21f8cdba4c --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.cpp @@ -0,0 +1,108 @@ +//===-- AMDGPUPrepareAGPRAlloc.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Make simple transformations to relax register constraints for cases which can +// allocate to AGPRs or VGPRs. Replace materialize of inline immediates into +// AGPR or VGPR with a pseudo with an AV_* class register constraint. This +// allows later passes to inflate the register class if necessary. The register +// allocator does not know to replace instructions to relax constraints. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUPrepareAGPRAlloc.h" +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "SIMachineFunctionInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-prepare-agpr-alloc" + +namespace { + +class AMDGPUPrepareAGPRAllocImpl { +private: + const SIInstrInfo &TII; + MachineRegisterInfo &MRI; + +public: + AMDGPUPrepareAGPRAllocImpl(const GCNSubtarget &ST, MachineRegisterInfo &MRI) + : TII(*ST.getInstrInfo()), MRI(MRI) {} + bool run(MachineFunction &MF); +}; + +class AMDGPUPrepareAGPRAllocLegacy : public MachineFunctionPass { +public: + static char ID; + + AMDGPUPrepareAGPRAllocLegacy() : MachineFunctionPass(ID) { + initializeAMDGPUPrepareAGPRAllocLegacyPass( + *PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { return "AMDGPU Prepare AGPR Alloc"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(AMDGPUPrepareAGPRAllocLegacy, DEBUG_TYPE, + "AMDGPU Prepare AGPR Alloc", false, false) +INITIALIZE_PASS_END(AMDGPUPrepareAGPRAllocLegacy, DEBUG_TYPE, + "AMDGPU Prepare AGPR Alloc", false, false) + +char AMDGPUPrepareAGPRAllocLegacy::ID = 0; + +char &llvm::AMDGPUPrepareAGPRAllocLegacyID = AMDGPUPrepareAGPRAllocLegacy::ID; + +bool AMDGPUPrepareAGPRAllocLegacy::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + return AMDGPUPrepareAGPRAllocImpl(ST, MF.getRegInfo()).run(MF); +} + +PreservedAnalyses +AMDGPUPrepareAGPRAllocPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + AMDGPUPrepareAGPRAllocImpl(ST, MF.getRegInfo()).run(MF); + return PreservedAnalyses::all(); +} + +bool AMDGPUPrepareAGPRAllocImpl::run(MachineFunction &MF) { + if (MRI.isReserved(AMDGPU::AGPR0)) + return false; + + const MCInstrDesc &AVImmPseudo = TII.get(AMDGPU::AV_MOV_B32_IMM_PSEUDO); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || + MI.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { + if (TII.isInlineConstant(MI, 1)) { + MI.setDesc(AVImmPseudo); + Changed = true; + } + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h new file mode 100644 index 0000000000000..dc598c98f241b --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUPrepareAGPRAlloc.h @@ -0,0 +1,23 @@ +//===- AMDGPUPrepareAGPRAlloc.h ---------------------------------*- C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPREPAREAGPRALLOC_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPREPAREAGPRALLOC_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class AMDGPUPrepareAGPRAllocPass + : public PassInfoMixin<AMDGPUPrepareAGPRAllocPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUPREPAREAGPRALLOC_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 31a80e00edd3b..c865082a1dcea 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -25,6 +25,7 @@ #include "AMDGPUMacroFusion.h" #include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUPreloadKernArgProlog.h" +#include "AMDGPUPrepareAGPRAlloc.h" #include "AMDGPURemoveIncompatibleFunctions.h" #include "AMDGPUReserveWWMRegs.h" #include "AMDGPUResourceUsageAnalysis.h" @@ -499,6 +500,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeGlobalISel(*PR); initializeAMDGPUAsmPrinterPass(*PR); initializeAMDGPUDAGToDAGISelLegacyPass(*PR); + initializeAMDGPUPrepareAGPRAllocLegacyPass(*PR); initializeGCNDPPCombineLegacyPass(*PR); initializeSILowerI1CopiesLegacyPass(*PR); initializeAMDGPUGlobalISelDivergenceLoweringPass(*PR); @@ -1196,6 +1198,7 @@ class GCNPassConfig final : public AMDGPUPassConfig { bool addRegBankSelect() override; void addPreGlobalInstructionSelect() override; bool addGlobalInstructionSelect() override; + void addPreRegAlloc() override; void addFastRegAlloc() override; void addOptimizedRegAlloc() override; @@ -1539,6 +1542,11 @@ void GCNPassConfig::addFastRegAlloc() { TargetPassConfig::addFastRegAlloc(); } +void GCNPassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(&AMDGPUPrepareAGPRAllocLegacyID); +} + void GCNPassConfig::addOptimizedRegAlloc() { if (EnableDCEInRA) insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID); @@ -2235,6 +2243,11 @@ void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( Base::addOptimizedRegAlloc(addPass); } +void AMDGPUCodeGenPassBuilder::addPreRegAlloc(AddMachinePass &addPass) const { + if (getOptLevel() != CodeGenOptLevel::None) + addPass(AMDGPUPrepareAGPRAllocPass()); +} + Error AMDGPUCodeGenPassBuilder::addRegAssignmentOptimized( AddMachinePass &addPass) const { // TODO: Check --regalloc-npm option diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 3b2f39c14a9bc..e0f1296ddded8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -181,7 +181,9 @@ class AMDGPUCodeGenPassBuilder void addMachineSSAOptimization(AddMachinePass &) const; void addPostRegAlloc(AddMachinePass &) const; void addPreEmitPass(AddMachinePass &) const; + void addPreEmitRegAlloc(AddMachinePass &) const; Error addRegAssignmentOptimized(AddMachinePass &) const; + void addPreRegAlloc(AddMachinePass &) const; void addOptimizedRegAlloc(AddMachinePass &) const; void addPreSched2(AddMachinePass &) const; diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index e3519f192137c..42edec0d01493 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -74,6 +74,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPULowerKernelArguments.cpp AMDGPULowerKernelAttributes.cpp AMDGPULowerModuleLDSPass.cpp + AMDGPUPrepareAGPRAlloc.cpp AMDGPUSwLowerLDS.cpp AMDGPUMachineFunction.cpp AMDGPUMachineModuleInfo.cpp diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h index 2764ed3d3f0b1..5e92921f3ea21 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h @@ -1113,7 +1113,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo { // that will not require an additional 4-bytes; this function assumes that it // will. bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const { - assert(!MO.isReg() && "isInlineConstant called on register operand!"); if (!MO.isImm()) return false; return isInlineConstant(MO.getImm(), OperandType); diff --git a/llvm/test/CodeGen/AMDGPU/agpr-remat.ll b/llvm/test/CodeGen/AMDGPU/agpr-remat.ll index 6742ae6c1d584..f6465de86fa4f 100644 --- a/llvm/test/CodeGen/AMDGPU/agpr-remat.ll +++ b/llvm/test/CodeGen/AMDGPU/agpr-remat.ll @@ -6,17 +6,17 @@ define amdgpu_kernel void @remat_constant_voids_spill(ptr addrspace(1) %p) #1 { ; GFX908-LABEL: remat_constant_voids_spill: ; GFX908: ; %bb.0: -; GFX908-NEXT: v_accvgpr_write_b32 a1, 1 -; GFX908-NEXT: v_accvgpr_write_b32 a5, 6 -; GFX908-NEXT: v_accvgpr_write_b32 a6, 7 -; GFX908-NEXT: v_accvgpr_write_b32 a7, 8 -; GFX908-NEXT: v_accvgpr_write_b32 a0, 9 -; GFX908-NEXT: v_accvgpr_write_b32 a2, 2 -; GFX908-NEXT: v_accvgpr_write_b32 a3, 3 -; GFX908-NEXT: v_accvgpr_write_b32 a4, 4 +; GFX908-NEXT: v_accvgpr_write_b32 a0, 1 +; GFX908-NEXT: v_accvgpr_write_b32 a1, 2 +; GFX908-NEXT: v_accvgpr_write_b32 a2, 3 +; GFX908-NEXT: v_accvgpr_write_b32 a3, 4 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ;;#ASMEND -; GFX908-NEXT: v_accvgpr_write_b32 a1, 5 +; GFX908-NEXT: v_accvgpr_write_b32 a0, 5 +; GFX908-NEXT: v_accvgpr_write_b32 a1, 6 +; GFX908-NEXT: v_accvgpr_write_b32 a2, 7 +; GFX908-NEXT: v_accvgpr_write_b32 a3, 8 +; GFX908-NEXT: v_accvgpr_write_b32 a4, 9 ; GFX908-NEXT: ;;#ASMSTART ; GFX908-NEXT: ;;#ASMEND ; GFX908-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir new file mode 100644 index 0000000000000..69bdb1f5066f0 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-prepare-agpr-alloc.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx90a -run-pass=amdgpu-prepare-agpr-alloc -o - %s | FileCheck -check-prefixes=HAS-AGPR,GFX90A %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=amdgpu-prepare-agpr-alloc -o - %s | FileCheck -check-prefixes=HAS-AGPR,GFX908 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx906 -passes=amdgpu-prepare-agpr-alloc -o - %s | FileCheck -check-prefix=NO-AGPR %s + +--- | + define void @func() { + ret void + } + + ; Attribute is ignored for gfx90a + define void @no_agprs() "amdgpu-agpr-alloc"="0,0" { + ret void + } + +... +--- +name: func +tracksRegLiveness: true +stack: + - { id: 0, size: 4 } +body: | + ; HAS-AGPR-LABEL: name: func + ; HAS-AGPR: bb.0: + ; HAS-AGPR-NEXT: successors: %bb.1(0x80000000) + ; HAS-AGPR-NEXT: liveins: $vgpr0 + ; HAS-AGPR-NEXT: {{ $}} + ; HAS-AGPR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; HAS-AGPR-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; HAS-AGPR-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec + ; HAS-AGPR-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65, implicit $exec + ; HAS-AGPR-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; HAS-AGPR-NEXT: [[AV_MOV_1:%[0-9]+]]:agpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec + ; HAS-AGPR-NEXT: [[AV_MOV_2:%[0-9]+]]:agpr_32 = AV_MOV_B32_IMM_PSEUDO 6, implicit $exec + ; HAS-AGPR-NEXT: {{ $}} + ; HAS-AGPR-NEXT: bb.1: + ; HAS-AGPR-NEXT: [[AV_MOV_3:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 3, implicit $exec + ; + ; NO-AGPR-LABEL: name: func + ; NO-AGPR: bb.0: + ; NO-AGPR-NEXT: successors: %bb.1(0x80000000) + ; NO-AGPR-NEXT: liveins: $vgpr0 + ; NO-AGPR-NEXT: {{ $}} + ; NO-AGPR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 $vgpr0, implicit $exec + ; NO-AGPR-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + ; NO-AGPR-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; NO-AGPR-NEXT: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65, implicit $exec + ; NO-AGPR-NEXT: [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + ; NO-AGPR-NEXT: [[V_ACCVGPR_WRITE_B32_e64_1:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec + ; NO-AGPR-NEXT: [[V_ACCVGPR_WRITE_B32_e64_2:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 6, implicit $exec + ; NO-AGPR-NEXT: {{ $}} + ; NO-AGPR-NEXT: bb.1: + ; NO-AGPR-NEXT: [[V_MOV_B32_e32_4:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + bb.0: + liveins: $vgpr0 + %0:vgpr_32 = V_MOV_B32_e32 $vgpr0, implicit $exec + %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 $vgpr0, implicit $exec + %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %3:vgpr_32 = V_MOV_B32_e32 65, implicit $exec + %4:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec + %5:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec + %6:agpr_32 = V_ACCVGPR_WRITE_B32_e64 6, implicit $exec + + bb.1: + %7:vgpr_32 = V_MOV_B32_e32 3, implicit $exec + +... + +--- +name: no_agprs +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GFX90A-LABEL: name: no_agprs + ; GFX90A: liveins: $vgpr0 + ; GFX90A-NEXT: {{ $}} + ; GFX90A-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; GFX90A-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec + ; + ; GFX908-LABEL: name: no_agprs + ; GFX908: liveins: $vgpr0 + ; GFX908-NEXT: {{ $}} + ; GFX908-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec + ; GFX908-NEXT: [[AV_MOV_1:%[0-9]+]]:agpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec + ; + ; NO-AGPR-LABEL: name: no_agprs + ; NO-AGPR: liveins: $vgpr0 + ; NO-AGPR-NEXT: {{ $}} + ; NO-AGPR-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + ; NO-AGPR-NEXT: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec + %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec + %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec + +... diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll index ae90cfb631e8d..7eb7d72e6cb97 100644 --- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll +++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll @@ -25,7 +25,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: S_BITCMP1_B32 renamable $sgpr17, 8, implicit-def $scc ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_CSELECT_B64 -1, 0, implicit killed $scc ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_XOR_B64 killed renamable $sgpr18_sgpr19, -1, implicit-def dead $scc - ; GFX90A-NEXT: renamable $vgpr3 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr3 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec ; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 renamable $vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3) ; GFX90A-NEXT: renamable $sgpr18_sgpr19 = S_MOV_B64 0 ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr28_sgpr29, implicit-def dead $scc @@ -56,8 +56,8 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr34_sgpr35, implicit-def dead $scc - ; GFX90A-NEXT: renamable $vgpr15 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr17 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr15 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr17 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.57, implicit $vcc ; GFX90A-NEXT: {{ $}} ; GFX90A-NEXT: bb.4.bb15: @@ -112,14 +112,14 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 ; GFX90A-NEXT: successors: %bb.7(0x80000000) ; GFX90A-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x000000000000000F, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr64_sgpr65, $sgpr66_sgpr67, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $sgpr24_sgpr25_sgpr26_sgpr27:0x00000000000000F0, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3 ; GFX90A-NEXT: {{ $}} - ; GFX90A-NEXT: renamable $vgpr19 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr18 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr21 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr20 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr23 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr22 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr25 = V_MOV_B32_e32 0, implicit $exec - ; GFX90A-NEXT: renamable $vgpr24 = V_MOV_B32_e32 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr19 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr18 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr21 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + ; GFX90A-NEXT: renamable $vgpr20 = AV_MOV_B32_IMM_PSE... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/149292 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits