https://github.com/vikramRH updated https://github.com/llvm/llvm-project/pull/174096
>From 10ceffb21660dec8c0b23c7bbb4a0e3aa02cd4f1 Mon Sep 17 00:00:00 2001 From: vikhegde <[email protected]> Date: Wed, 31 Dec 2025 19:38:57 +0530 Subject: [PATCH] [AMDGPU][NPM] Complete fast regalloc pipeline --- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 38 +++++++++++++++++++ llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll | 11 +++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index d25b22b2b96dc..f8a83e72bc3ef 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -154,8 +154,10 @@ class AMDGPUCodeGenPassBuilder void addPostRegAlloc(PassManagerWrapper &PMW) const; void addPreEmitPass(PassManagerWrapper &PMWM) const; void addPreEmitRegAlloc(PassManagerWrapper &PMW) const; + Error addRegAssignmentFast(PassManagerWrapper &PMW) const; Error addRegAssignmentOptimized(PassManagerWrapper &PMW) const; void addPreRegAlloc(PassManagerWrapper &PMW) const; + Error addFastRegAlloc(PassManagerWrapper &PMW) const; void addOptimizedRegAlloc(PassManagerWrapper &PMW) const; void addPreSched2(PassManagerWrapper &PMW) const; void addPostBBSections(PassManagerWrapper &PMW) const; @@ -2311,6 +2313,42 @@ void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization( addMachineFunctionPass(SIShrinkInstructionsPass(), PMW); } +Error AMDGPUCodeGenPassBuilder::addFastRegAlloc(PassManagerWrapper &PMW) const { + insertPass<PHIEliminationPass>(SILowerControlFlowPass()); + + insertPass<TwoAddressInstructionPass>(SIWholeQuadModePass()); + + return Base::addFastRegAlloc(PMW); +} + +Error AMDGPUCodeGenPassBuilder::addRegAssignmentFast( + PassManagerWrapper &PMW) const { + // TODO: handle default regalloc override error (with regalloc-npm) + + addMachineFunctionPass(GCNPreRALongBranchRegPass(), PMW); + + addMachineFunctionPass(RegAllocFastPass({onlyAllocateSGPRs, "sgpr", false}), + PMW); + + // Equivalent of PEI for SGPRs. + addMachineFunctionPass(SILowerSGPRSpillsPass(), PMW); + + // To Allocate wwm registers used in whole quad mode operations (for shaders). + addMachineFunctionPass(SIPreAllocateWWMRegsPass(), PMW); + + // For allocating other wwm register operands. + addMachineFunctionPass(RegAllocFastPass({onlyAllocateWWMRegs, "wwm", false}), + PMW); + + addMachineFunctionPass(SILowerWWMCopiesPass(), PMW); + addMachineFunctionPass(AMDGPUReserveWWMRegsPass(), PMW); + + // For allocating per-thread VGPRs. + addMachineFunctionPass(RegAllocFastPass({onlyAllocateVGPRs, "vgpr"}), PMW); + + return Error::success(); +} + void AMDGPUCodeGenPassBuilder::addOptimizedRegAlloc( PassManagerWrapper &PMW) const { if (EnableDCEInRA) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll index ae64d33cc0b43..953f7e1a5e3c7 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll @@ -58,8 +58,17 @@ ; GCN-O0-NEXT:require<reg-usage> ; GCN-O0-NEXT:cgscc(function(machine-function(reg-usage-propagation ; GCN-O0-NEXT:phi-node-elimination +; GCN-O0-NEXT:si-lower-control-flow ; GCN-O0-NEXT:two-address-instruction -; GCN-O0-NEXT:regallocfast +; GCN-O0-NEXT:si-wqm +; GCN-O0-NEXT:amdgpu-pre-ra-long-branch-reg +; GCN-O0-NEXT:regallocfast<filter=sgpr;no-clear-vregs> +; GCN-O0-NEXT:si-lower-sgpr-spills +; GCN-O0-NEXT:si-pre-allocate-wwm-regs +; GCN-O0-NEXT:regallocfast<filter=wwm;no-clear-vregs> +; GCN-O0-NEXT:si-lower-wwm-copies +; GCN-O0-NEXT:amdgpu-reserve-wwm-regs +; GCN-O0-NEXT:regallocfast<filter=vgpr> ; GCN-O0-NEXT:si-fix-vgpr-copies ; GCN-O0-NEXT:remove-redundant-debug-values ; GCN-O0-NEXT:fixup-statepoint-caller-saved _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
