https://github.com/vikramRH updated 
https://github.com/llvm/llvm-project/pull/173487

>From 1ccfb973a55f6dd96bb8b5848737363cb9b21784 Mon Sep 17 00:00:00 2001
From: vikhegde <[email protected]>
Date: Wed, 24 Dec 2025 11:05:18 +0530
Subject: [PATCH] [AMDGPU][NPM] Enable "AMDGPURewriteAGPRCopyMFMAPass"

---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   2 +
 llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll  | 838 +++++++++---------
 2 files changed, 422 insertions(+), 418 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 50ee013a34684..d25b22b2b96dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2288,6 +2288,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreRewrite(PassManagerWrapper &PMW) const {
   if (EnableRegReassign) {
     addMachineFunctionPass(GCNNSAReassignPass(), PMW);
   }
+
+  addMachineFunctionPass(AMDGPURewriteAGPRCopyMFMAPass(), PMW);
 }
 
 void AMDGPUCodeGenPassBuilder::addMachineSSAOptimization(
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll 
b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
index 6305806c48f18..9f8607d1bcd0c 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline-npm.ll
@@ -7,426 +7,428 @@
 ; RUN: llc -O3 -enable-new-pm -mtriple=amdgcn--amdhsa -print-pipeline-passes < 
%s 2>&1 \
 ; RUN:   | tr ',' '\n' | FileCheck -check-prefix=GCN-O3 %s
 
-; GCN-O0: require<MachineModuleAnalysis>
-; GCN-O0-NEXT: require<profile-summary>
-; GCN-O0-NEXT: require<collector-metadata>
-; GCN-O0-NEXT: require<runtime-libcall-info>
-; GCN-O0-NEXT: pre-isel-intrinsic-lowering
-; GCN-O0-NEXT: function(expand-ir-insts<O0>)
-; GCN-O0-NEXT: amdgpu-remove-incompatible-functions
-; GCN-O0-NEXT: amdgpu-printf-runtime-binding
-; GCN-O0-NEXT: amdgpu-lower-ctor-dtor
-; GCN-O0-NEXT: function(amdgpu-uniform-intrinsic-combine)
-; GCN-O0-NEXT: expand-variadics
-; GCN-O0-NEXT: amdgpu-always-inline
-; GCN-O0-NEXT: always-inline
-; GCN-O0-NEXT: amdgpu-export-kernel-runtime-handles
-; GCN-O0-NEXT: amdgpu-lower-exec-sync
-; GCN-O0-NEXT: amdgpu-sw-lower-lds
-; GCN-O0-NEXT: amdgpu-lower-module-lds
-; GCN-O0-NEXT: function(atomic-expand
-; GCN-O0-NEXT: verify
-; GCN-O0-NEXT: unreachableblockelim
-; GCN-O0-NEXT: ee-instrument<post-inline>
-; GCN-O0-NEXT: scalarize-masked-mem-intrin
-; GCN-O0-NEXT: expand-reductions
-; GCN-O0-NEXT: amdgpu-lower-kernel-arguments)
-; GCN-O0-NEXT: amdgpu-lower-buffer-fat-pointers
-; GCN-O0-NEXT: amdgpu-lower-intrinsics
-; GCN-O0-NEXT: cgscc(function(lower-switch
-; GCN-O0-NEXT: lower-invoke
-; GCN-O0-NEXT: unreachableblockelim))
-; GCN-O0-NEXT: require<amdgpu-argument-usage>
-; GCN-O0-NEXT: cgscc(function(amdgpu-unify-divergent-exit-nodes
-; GCN-O0-NEXT: fix-irreducible
-; GCN-O0-NEXT: unify-loop-exits
-; GCN-O0-NEXT: StructurizeCFGPass
-; GCN-O0-NEXT: amdgpu-annotate-uniform
-; GCN-O0-NEXT: si-annotate-control-flow
-; GCN-O0-NEXT: amdgpu-rewrite-undef-for-phi
-; GCN-O0-NEXT: lcssa
-; GCN-O0-NEXT: require<uniformity>
-; GCN-O0-NEXT: callbr-prepare
-; GCN-O0-NEXT: safe-stack
-; GCN-O0-NEXT: stack-protector
-; GCN-O0-NEXT: verify))
-; GCN-O0-NEXT: cgscc(function(machine-function(amdgpu-isel
-; GCN-O0-NEXT: si-fix-sgpr-copies
-; GCN-O0-NEXT: si-i1-copies
-; GCN-O0-NEXT: finalize-isel
-; GCN-O0-NEXT: localstackalloc)))
-; GCN-O0-NEXT: require<reg-usage>
-; GCN-O0-NEXT: cgscc(function(machine-function(reg-usage-propagation
-; GCN-O0-NEXT: phi-node-elimination
-; GCN-O0-NEXT: two-address-instruction
-; GCN-O0-NEXT: regallocfast
-; GCN-O0-NEXT: si-fix-vgpr-copies
-; GCN-O0-NEXT: remove-redundant-debug-values
-; GCN-O0-NEXT: fixup-statepoint-caller-saved
-; GCN-O0-NEXT: prolog-epilog
-; GCN-O0-NEXT: post-ra-pseudos
-; GCN-O0-NEXT: si-post-ra-bundler
-; GCN-O0-NEXT: fentry-insert
-; GCN-O0-NEXT: xray-instrumentation
-; GCN-O0-NEXT: si-memory-legalizer
-; GCN-O0-NEXT: si-insert-waitcnts
-; GCN-O0-NEXT: si-mode-register
-; GCN-O0-NEXT: si-late-branch-lowering
-; GCN-O0-NEXT: post-RA-hazard-rec
-; GCN-O0-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O0-NEXT: amdgpu-lower-vgpr-encoding
-; GCN-O0-NEXT: branch-relaxation)))
-; GCN-O0-NEXT: require<reg-usage>
-; GCN-O0-NEXT: cgscc(function(machine-function(reg-usage-collector
-; GCN-O0-NEXT: remove-loads-into-fake-uses
-; GCN-O0-NEXT: live-debug-values
-; GCN-O0-NEXT: machine-sanmd
-; GCN-O0-NEXT: amdgpu-preload-kern-arg-prolog
-; GCN-O0-NEXT: stack-frame-layout
-; GCN-O0-NEXT: verify)
-; GCN-O0-NEXT: free-machine-function))
+; GCN-O0:require<MachineModuleAnalysis>
+; GCN-O0-NEXT:require<profile-summary>
+; GCN-O0-NEXT:require<collector-metadata>
+; GCN-O0-NEXT:require<runtime-libcall-info>
+; GCN-O0-NEXT:pre-isel-intrinsic-lowering
+; GCN-O0-NEXT:function(expand-ir-insts<O0>)
+; GCN-O0-NEXT:amdgpu-remove-incompatible-functions
+; GCN-O0-NEXT:amdgpu-printf-runtime-binding
+; GCN-O0-NEXT:amdgpu-lower-ctor-dtor
+; GCN-O0-NEXT:function(amdgpu-uniform-intrinsic-combine)
+; GCN-O0-NEXT:expand-variadics
+; GCN-O0-NEXT:amdgpu-always-inline
+; GCN-O0-NEXT:always-inline
+; GCN-O0-NEXT:amdgpu-export-kernel-runtime-handles
+; GCN-O0-NEXT:amdgpu-lower-exec-sync
+; GCN-O0-NEXT:amdgpu-sw-lower-lds
+; GCN-O0-NEXT:amdgpu-lower-module-lds
+; GCN-O0-NEXT:function(atomic-expand
+; GCN-O0-NEXT:verify
+; GCN-O0-NEXT:unreachableblockelim
+; GCN-O0-NEXT:ee-instrument<post-inline>
+; GCN-O0-NEXT:scalarize-masked-mem-intrin
+; GCN-O0-NEXT:expand-reductions
+; GCN-O0-NEXT:amdgpu-lower-kernel-arguments)
+; GCN-O0-NEXT:amdgpu-lower-buffer-fat-pointers
+; GCN-O0-NEXT:amdgpu-lower-intrinsics
+; GCN-O0-NEXT:cgscc(function(lower-switch
+; GCN-O0-NEXT:lower-invoke
+; GCN-O0-NEXT:unreachableblockelim))
+; GCN-O0-NEXT:require<amdgpu-argument-usage>
+; GCN-O0-NEXT:cgscc(function(amdgpu-unify-divergent-exit-nodes
+; GCN-O0-NEXT:fix-irreducible
+; GCN-O0-NEXT:unify-loop-exits
+; GCN-O0-NEXT:StructurizeCFGPass
+; GCN-O0-NEXT:amdgpu-annotate-uniform
+; GCN-O0-NEXT:si-annotate-control-flow
+; GCN-O0-NEXT:amdgpu-rewrite-undef-for-phi
+; GCN-O0-NEXT:lcssa
+; GCN-O0-NEXT:require<uniformity>
+; GCN-O0-NEXT:callbr-prepare
+; GCN-O0-NEXT:safe-stack
+; GCN-O0-NEXT:stack-protector
+; GCN-O0-NEXT:verify))
+; GCN-O0-NEXT:cgscc(function(machine-function(amdgpu-isel
+; GCN-O0-NEXT:si-fix-sgpr-copies
+; GCN-O0-NEXT:si-i1-copies
+; GCN-O0-NEXT:finalize-isel
+; GCN-O0-NEXT:localstackalloc)))
+; GCN-O0-NEXT:require<reg-usage>
+; GCN-O0-NEXT:cgscc(function(machine-function(reg-usage-propagation
+; GCN-O0-NEXT:phi-node-elimination
+; GCN-O0-NEXT:two-address-instruction
+; GCN-O0-NEXT:regallocfast
+; GCN-O0-NEXT:si-fix-vgpr-copies
+; GCN-O0-NEXT:remove-redundant-debug-values
+; GCN-O0-NEXT:fixup-statepoint-caller-saved
+; GCN-O0-NEXT:prolog-epilog
+; GCN-O0-NEXT:post-ra-pseudos
+; GCN-O0-NEXT:si-post-ra-bundler
+; GCN-O0-NEXT:fentry-insert
+; GCN-O0-NEXT:xray-instrumentation
+; GCN-O0-NEXT:si-memory-legalizer
+; GCN-O0-NEXT:si-insert-waitcnts
+; GCN-O0-NEXT:si-mode-register
+; GCN-O0-NEXT:si-late-branch-lowering
+; GCN-O0-NEXT:post-RA-hazard-rec
+; GCN-O0-NEXT:amdgpu-wait-sgpr-hazards
+; GCN-O0-NEXT:amdgpu-lower-vgpr-encoding
+; GCN-O0-NEXT:branch-relaxation)))
+; GCN-O0-NEXT:require<reg-usage>
+; GCN-O0-NEXT:cgscc(function(machine-function(reg-usage-collector
+; GCN-O0-NEXT:remove-loads-into-fake-uses
+; GCN-O0-NEXT:live-debug-values
+; GCN-O0-NEXT:machine-sanmd
+; GCN-O0-NEXT:amdgpu-preload-kern-arg-prolog
+; GCN-O0-NEXT:stack-frame-layout
+; GCN-O0-NEXT:verify)
+; GCN-O0-NEXT:free-machine-function))
 
-; GCN-O2: require<MachineModuleAnalysis>
-; GCN-O2-NEXT: require<profile-summary>
-; GCN-O2-NEXT: require<collector-metadata>
-; GCN-O2-NEXT: require<runtime-libcall-info>
-; GCN-O2-NEXT: pre-isel-intrinsic-lowering
-; GCN-O2-NEXT: function(expand-ir-insts<O2>)
-; GCN-O2-NEXT: amdgpu-remove-incompatible-functions
-; GCN-O2-NEXT: amdgpu-printf-runtime-binding
-; GCN-O2-NEXT: amdgpu-lower-ctor-dtor
-; GCN-O2-NEXT: function(amdgpu-image-intrinsic-opt
-; GCN-O2-NEXT: amdgpu-uniform-intrinsic-combine)
-; GCN-O2-NEXT: expand-variadics
-; GCN-O2-NEXT: amdgpu-always-inline
-; GCN-O2-NEXT: always-inline
-; GCN-O2-NEXT: amdgpu-export-kernel-runtime-handles
-; GCN-O2-NEXT: amdgpu-lower-exec-sync
-; GCN-O2-NEXT: amdgpu-sw-lower-lds
-; GCN-O2-NEXT: amdgpu-lower-module-lds
-; GCN-O2-NEXT: function(amdgpu-atomic-optimizer
-; GCN-O2-NEXT: atomic-expand
-; GCN-O2-NEXT: amdgpu-promote-alloca
-; GCN-O2-NEXT: separate-const-offset-from-gep<>
-; GCN-O2-NEXT: slsr
-; GCN-O2-NEXT: early-cse<>
-; GCN-O2-NEXT: nary-reassociate
-; GCN-O2-NEXT: early-cse<>
-; GCN-O2-NEXT: amdgpu-codegenprepare
-; GCN-O2-NEXT: loop-mssa(licm<allowspeculation>)
-; GCN-O2-NEXT: verify
-; GCN-O2-NEXT: loop-mssa(canon-freeze
-; GCN-O2-NEXT: loop-reduce)
-; GCN-O2-NEXT: mergeicmps
-; GCN-O2-NEXT: expand-memcmp
-; GCN-O2-NEXT: unreachableblockelim
-; GCN-O2-NEXT: consthoist
-; GCN-O2-NEXT: replace-with-veclib
-; GCN-O2-NEXT: partially-inline-libcalls
-; GCN-O2-NEXT: ee-instrument<post-inline>
-; GCN-O2-NEXT: scalarize-masked-mem-intrin
-; GCN-O2-NEXT: expand-reductions
-; GCN-O2-NEXT: early-cse<>)
-; GCN-O2-NEXT: amdgpu-preload-kernel-arguments
-; GCN-O2-NEXT: function(amdgpu-lower-kernel-arguments
-; GCN-O2-NEXT: codegenprepare
-; GCN-O2-NEXT: load-store-vectorizer)
-; GCN-O2-NEXT: amdgpu-lower-buffer-fat-pointers
-; GCN-O2-NEXT: amdgpu-lower-intrinsics
-; GCN-O2-NEXT: cgscc(function(lower-switch
-; GCN-O2-NEXT: lower-invoke
-; GCN-O2-NEXT: unreachableblockelim))
-; GCN-O2-NEXT: require<amdgpu-argument-usage>
-; GCN-O2-NEXT: cgscc(function(flatten-cfg
-; GCN-O2-NEXT: sink
-; GCN-O2-NEXT: amdgpu-late-codegenprepare
-; GCN-O2-NEXT: amdgpu-unify-divergent-exit-nodes
-; GCN-O2-NEXT: fix-irreducible
-; GCN-O2-NEXT: unify-loop-exits
-; GCN-O2-NEXT: StructurizeCFGPass
-; GCN-O2-NEXT: amdgpu-annotate-uniform
-; GCN-O2-NEXT: si-annotate-control-flow
-; GCN-O2-NEXT: amdgpu-rewrite-undef-for-phi
-; GCN-O2-NEXT: lcssa))
-; GCN-O2-NEXT: amdgpu-perf-hint
-; GCN-O2-NEXT: cgscc(function(require<uniformity>
-; GCN-O2-NEXT: objc-arc-contract
-; GCN-O2-NEXT: callbr-prepare
-; GCN-O2-NEXT: safe-stack
-; GCN-O2-NEXT: stack-protector
-; GCN-O2-NEXT: verify))
-; GCN-O2-NEXT: cgscc(function(machine-function(amdgpu-isel
-; GCN-O2-NEXT: si-fix-sgpr-copies
-; GCN-O2-NEXT: si-i1-copies
-; GCN-O2-NEXT: finalize-isel
-; GCN-O2-NEXT: early-tailduplication
-; GCN-O2-NEXT: opt-phis
-; GCN-O2-NEXT: stack-coloring
-; GCN-O2-NEXT: localstackalloc
-; GCN-O2-NEXT: dead-mi-elimination
-; GCN-O2-NEXT: early-machinelicm
-; GCN-O2-NEXT: machine-cse
-; GCN-O2-NEXT: machine-sink
-; GCN-O2-NEXT: peephole-opt
-; GCN-O2-NEXT: dead-mi-elimination
-; GCN-O2-NEXT: si-fold-operands
-; GCN-O2-NEXT: gcn-dpp-combine
-; GCN-O2-NEXT: si-load-store-opt
-; GCN-O2-NEXT: si-peephole-sdwa
-; GCN-O2-NEXT: early-machinelicm
-; GCN-O2-NEXT: machine-cse
-; GCN-O2-NEXT: si-fold-operands
-; GCN-O2-NEXT: dead-mi-elimination
-; GCN-O2-NEXT: si-shrink-instructions)))
-; GCN-O2-NEXT: require<reg-usage>
-; GCN-O2-NEXT: cgscc(function(machine-function(reg-usage-propagation
-; GCN-O2-NEXT: amdgpu-prepare-agpr-alloc
-; GCN-O2-NEXT: detect-dead-lanes
-; GCN-O2-NEXT: dead-mi-elimination
-; GCN-O2-NEXT: init-undef
-; GCN-O2-NEXT: process-imp-defs
-; GCN-O2-NEXT: unreachable-mbb-elimination
-; GCN-O2-NEXT: require<live-vars>
-; GCN-O2-NEXT: si-opt-vgpr-liverange
-; GCN-O2-NEXT: require<machine-loops>
-; GCN-O2-NEXT: phi-node-elimination
-; GCN-O2-NEXT: si-lower-control-flow
-; GCN-O2-NEXT: two-address-instruction
-; GCN-O2-NEXT: register-coalescer
-; GCN-O2-NEXT: rename-independent-subregs
-; GCN-O2-NEXT: amdgpu-rewrite-partial-reg-uses
-; GCN-O2-NEXT: machine-scheduler
-; GCN-O2-NEXT: amdgpu-pre-ra-optimizations
-; GCN-O2-NEXT: si-wqm
-; GCN-O2-NEXT: si-optimize-exec-masking-pre-ra
-; GCN-O2-NEXT: si-form-memory-clauses
-; GCN-O2-NEXT: amdgpu-pre-ra-long-branch-reg
-; GCN-O2-NEXT: greedy<sgpr>
-; GCN-O2-NEXT: virt-reg-rewriter<no-clear-vregs>
-; GCN-O2-NEXT: stack-slot-coloring
-; GCN-O2-NEXT: si-lower-sgpr-spills
-; GCN-O2-NEXT: si-pre-allocate-wwm-regs
-; GCN-O2-NEXT: greedy<wwm>
-; GCN-O2-NEXT: si-lower-wwm-copies
-; GCN-O2-NEXT: virt-reg-rewriter<no-clear-vregs>
-; GCN-O2-NEXT: amdgpu-reserve-wwm-regs
-; GCN-O2-NEXT: greedy<vgpr>
-; GCN-O2-NEXT: amdgpu-nsa-reassign
-; GCN-O2-NEXT: virt-reg-rewriter
-; GCN-O2-NEXT: amdgpu-mark-last-scratch-load
-; GCN-O2-NEXT: stack-slot-coloring
-; GCN-O2-NEXT: machine-cp
-; GCN-O2-NEXT: machinelicm
-; GCN-O2-NEXT: si-fix-vgpr-copies
-; GCN-O2-NEXT: si-optimize-exec-masking
-; GCN-O2-NEXT: remove-redundant-debug-values
-; GCN-O2-NEXT: fixup-statepoint-caller-saved
-; GCN-O2-NEXT: postra-machine-sink
-; GCN-O2-NEXT: shrink-wrap
-; GCN-O2-NEXT: prolog-epilog
-; GCN-O2-NEXT: machine-latecleanup
-; GCN-O2-NEXT: branch-folder
-; GCN-O2-NEXT: tailduplication
-; GCN-O2-NEXT: machine-cp
-; GCN-O2-NEXT: post-ra-pseudos
-; GCN-O2-NEXT: si-shrink-instructions
-; GCN-O2-NEXT: si-post-ra-bundler
-; GCN-O2-NEXT: postmisched
-; GCN-O2-NEXT: block-placement
-; GCN-O2-NEXT: fentry-insert
-; GCN-O2-NEXT: xray-instrumentation
-; GCN-O2-NEXT: gcn-create-vopd
-; GCN-O2-NEXT: si-memory-legalizer
-; GCN-O2-NEXT: si-insert-waitcnts
-; GCN-O2-NEXT: si-mode-register
-; GCN-O2-NEXT: si-insert-hard-clauses
-; GCN-O2-NEXT: si-late-branch-lowering
-; GCN-O2-NEXT: si-pre-emit-peephole
-; GCN-O2-NEXT: post-RA-hazard-rec
-; GCN-O2-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O2-NEXT: amdgpu-lower-vgpr-encoding
-; GCN-O2-NEXT: amdgpu-insert-delay-alu
-; GCN-O2-NEXT: branch-relaxation)))
-; GCN-O2-NEXT: require<reg-usage>
-; GCN-O2-NEXT: cgscc(function(machine-function(reg-usage-collector
-; GCN-O2-NEXT: remove-loads-into-fake-uses
-; GCN-O2-NEXT: live-debug-values
-; GCN-O2-NEXT: machine-sanmd
-; GCN-O2-NEXT: amdgpu-preload-kern-arg-prolog
-; GCN-O2-NEXT: stack-frame-layout
-; GCN-O2-NEXT: verify)
-; GCN-O2-NEXT: free-machine-function))
+; GCN-O2:require<MachineModuleAnalysis>
+; GCN-O2-NEXT:require<profile-summary>
+; GCN-O2-NEXT:require<collector-metadata>
+; GCN-O2-NEXT:require<runtime-libcall-info>
+; GCN-O2-NEXT:pre-isel-intrinsic-lowering
+; GCN-O2-NEXT:function(expand-ir-insts<O2>)
+; GCN-O2-NEXT:amdgpu-remove-incompatible-functions
+; GCN-O2-NEXT:amdgpu-printf-runtime-binding
+; GCN-O2-NEXT:amdgpu-lower-ctor-dtor
+; GCN-O2-NEXT:function(amdgpu-image-intrinsic-opt
+; GCN-O2-NEXT:amdgpu-uniform-intrinsic-combine)
+; GCN-O2-NEXT:expand-variadics
+; GCN-O2-NEXT:amdgpu-always-inline
+; GCN-O2-NEXT:always-inline
+; GCN-O2-NEXT:amdgpu-export-kernel-runtime-handles
+; GCN-O2-NEXT:amdgpu-lower-exec-sync
+; GCN-O2-NEXT:amdgpu-sw-lower-lds
+; GCN-O2-NEXT:amdgpu-lower-module-lds
+; GCN-O2-NEXT:function(amdgpu-atomic-optimizer
+; GCN-O2-NEXT:atomic-expand
+; GCN-O2-NEXT:amdgpu-promote-alloca
+; GCN-O2-NEXT:separate-const-offset-from-gep<>
+; GCN-O2-NEXT:slsr
+; GCN-O2-NEXT:early-cse<>
+; GCN-O2-NEXT:nary-reassociate
+; GCN-O2-NEXT:early-cse<>
+; GCN-O2-NEXT:amdgpu-codegenprepare
+; GCN-O2-NEXT:loop-mssa(licm<allowspeculation>)
+; GCN-O2-NEXT:verify
+; GCN-O2-NEXT:loop-mssa(canon-freeze
+; GCN-O2-NEXT:loop-reduce)
+; GCN-O2-NEXT:mergeicmps
+; GCN-O2-NEXT:expand-memcmp
+; GCN-O2-NEXT:unreachableblockelim
+; GCN-O2-NEXT:consthoist
+; GCN-O2-NEXT:replace-with-veclib
+; GCN-O2-NEXT:partially-inline-libcalls
+; GCN-O2-NEXT:ee-instrument<post-inline>
+; GCN-O2-NEXT:scalarize-masked-mem-intrin
+; GCN-O2-NEXT:expand-reductions
+; GCN-O2-NEXT:early-cse<>)
+; GCN-O2-NEXT:amdgpu-preload-kernel-arguments
+; GCN-O2-NEXT:function(amdgpu-lower-kernel-arguments
+; GCN-O2-NEXT:codegenprepare
+; GCN-O2-NEXT:load-store-vectorizer)
+; GCN-O2-NEXT:amdgpu-lower-buffer-fat-pointers
+; GCN-O2-NEXT:amdgpu-lower-intrinsics
+; GCN-O2-NEXT:cgscc(function(lower-switch
+; GCN-O2-NEXT:lower-invoke
+; GCN-O2-NEXT:unreachableblockelim))
+; GCN-O2-NEXT:require<amdgpu-argument-usage>
+; GCN-O2-NEXT:cgscc(function(flatten-cfg
+; GCN-O2-NEXT:sink
+; GCN-O2-NEXT:amdgpu-late-codegenprepare
+; GCN-O2-NEXT:amdgpu-unify-divergent-exit-nodes
+; GCN-O2-NEXT:fix-irreducible
+; GCN-O2-NEXT:unify-loop-exits
+; GCN-O2-NEXT:StructurizeCFGPass
+; GCN-O2-NEXT:amdgpu-annotate-uniform
+; GCN-O2-NEXT:si-annotate-control-flow
+; GCN-O2-NEXT:amdgpu-rewrite-undef-for-phi
+; GCN-O2-NEXT:lcssa))
+; GCN-O2-NEXT:amdgpu-perf-hint
+; GCN-O2-NEXT:cgscc(function(require<uniformity>
+; GCN-O2-NEXT:objc-arc-contract
+; GCN-O2-NEXT:callbr-prepare
+; GCN-O2-NEXT:safe-stack
+; GCN-O2-NEXT:stack-protector
+; GCN-O2-NEXT:verify))
+; GCN-O2-NEXT:cgscc(function(machine-function(amdgpu-isel
+; GCN-O2-NEXT:si-fix-sgpr-copies
+; GCN-O2-NEXT:si-i1-copies
+; GCN-O2-NEXT:finalize-isel
+; GCN-O2-NEXT:early-tailduplication
+; GCN-O2-NEXT:opt-phis
+; GCN-O2-NEXT:stack-coloring
+; GCN-O2-NEXT:localstackalloc
+; GCN-O2-NEXT:dead-mi-elimination
+; GCN-O2-NEXT:early-machinelicm
+; GCN-O2-NEXT:machine-cse
+; GCN-O2-NEXT:machine-sink
+; GCN-O2-NEXT:peephole-opt
+; GCN-O2-NEXT:dead-mi-elimination
+; GCN-O2-NEXT:si-fold-operands
+; GCN-O2-NEXT:gcn-dpp-combine
+; GCN-O2-NEXT:si-load-store-opt
+; GCN-O2-NEXT:si-peephole-sdwa
+; GCN-O2-NEXT:early-machinelicm
+; GCN-O2-NEXT:machine-cse
+; GCN-O2-NEXT:si-fold-operands
+; GCN-O2-NEXT:dead-mi-elimination
+; GCN-O2-NEXT:si-shrink-instructions)))
+; GCN-O2-NEXT:require<reg-usage>
+; GCN-O2-NEXT:cgscc(function(machine-function(reg-usage-propagation
+; GCN-O2-NEXT:amdgpu-prepare-agpr-alloc
+; GCN-O2-NEXT:detect-dead-lanes
+; GCN-O2-NEXT:dead-mi-elimination
+; GCN-O2-NEXT:init-undef
+; GCN-O2-NEXT:process-imp-defs
+; GCN-O2-NEXT:unreachable-mbb-elimination
+; GCN-O2-NEXT:require<live-vars>
+; GCN-O2-NEXT:si-opt-vgpr-liverange
+; GCN-O2-NEXT:require<machine-loops>
+; GCN-O2-NEXT:phi-node-elimination
+; GCN-O2-NEXT:si-lower-control-flow
+; GCN-O2-NEXT:two-address-instruction
+; GCN-O2-NEXT:register-coalescer
+; GCN-O2-NEXT:rename-independent-subregs
+; GCN-O2-NEXT:amdgpu-rewrite-partial-reg-uses
+; GCN-O2-NEXT:machine-scheduler
+; GCN-O2-NEXT:amdgpu-pre-ra-optimizations
+; GCN-O2-NEXT:si-wqm
+; GCN-O2-NEXT:si-optimize-exec-masking-pre-ra
+; GCN-O2-NEXT:si-form-memory-clauses
+; GCN-O2-NEXT:amdgpu-pre-ra-long-branch-reg
+; GCN-O2-NEXT:greedy<sgpr>
+; GCN-O2-NEXT:virt-reg-rewriter<no-clear-vregs>
+; GCN-O2-NEXT:stack-slot-coloring
+; GCN-O2-NEXT:si-lower-sgpr-spills
+; GCN-O2-NEXT:si-pre-allocate-wwm-regs
+; GCN-O2-NEXT:greedy<wwm>
+; GCN-O2-NEXT:si-lower-wwm-copies
+; GCN-O2-NEXT:virt-reg-rewriter<no-clear-vregs>
+; GCN-O2-NEXT:amdgpu-reserve-wwm-regs
+; GCN-O2-NEXT:greedy<vgpr>
+; GCN-O2-NEXT:amdgpu-nsa-reassign
+; GCN-O2-NEXT:amdgpu-rewrite-agpr-copy-mfma
+; GCN-O2-NEXT:virt-reg-rewriter
+; GCN-O2-NEXT:amdgpu-mark-last-scratch-load
+; GCN-O2-NEXT:stack-slot-coloring
+; GCN-O2-NEXT:machine-cp
+; GCN-O2-NEXT:machinelicm
+; GCN-O2-NEXT:si-fix-vgpr-copies
+; GCN-O2-NEXT:si-optimize-exec-masking
+; GCN-O2-NEXT:remove-redundant-debug-values
+; GCN-O2-NEXT:fixup-statepoint-caller-saved
+; GCN-O2-NEXT:postra-machine-sink
+; GCN-O2-NEXT:shrink-wrap
+; GCN-O2-NEXT:prolog-epilog
+; GCN-O2-NEXT:machine-latecleanup
+; GCN-O2-NEXT:branch-folder
+; GCN-O2-NEXT:tailduplication
+; GCN-O2-NEXT:machine-cp
+; GCN-O2-NEXT:post-ra-pseudos
+; GCN-O2-NEXT:si-shrink-instructions
+; GCN-O2-NEXT:si-post-ra-bundler
+; GCN-O2-NEXT:postmisched
+; GCN-O2-NEXT:block-placement
+; GCN-O2-NEXT:fentry-insert
+; GCN-O2-NEXT:xray-instrumentation
+; GCN-O2-NEXT:gcn-create-vopd
+; GCN-O2-NEXT:si-memory-legalizer
+; GCN-O2-NEXT:si-insert-waitcnts
+; GCN-O2-NEXT:si-mode-register
+; GCN-O2-NEXT:si-insert-hard-clauses
+; GCN-O2-NEXT:si-late-branch-lowering
+; GCN-O2-NEXT:si-pre-emit-peephole
+; GCN-O2-NEXT:post-RA-hazard-rec
+; GCN-O2-NEXT:amdgpu-wait-sgpr-hazards
+; GCN-O2-NEXT:amdgpu-lower-vgpr-encoding
+; GCN-O2-NEXT:amdgpu-insert-delay-alu
+; GCN-O2-NEXT:branch-relaxation)))
+; GCN-O2-NEXT:require<reg-usage>
+; GCN-O2-NEXT:cgscc(function(machine-function(reg-usage-collector
+; GCN-O2-NEXT:remove-loads-into-fake-uses
+; GCN-O2-NEXT:live-debug-values
+; GCN-O2-NEXT:machine-sanmd
+; GCN-O2-NEXT:amdgpu-preload-kern-arg-prolog
+; GCN-O2-NEXT:stack-frame-layout
+; GCN-O2-NEXT:verify)
+; GCN-O2-NEXT:free-machine-function))
 
-; GCN-O3: require<MachineModuleAnalysis>
-; GCN-O3-NEXT: require<profile-summary>
-; GCN-O3-NEXT: require<collector-metadata>
-; GCN-O3-NEXT: require<runtime-libcall-info>
-; GCN-O3-NEXT: pre-isel-intrinsic-lowering
-; GCN-O3-NEXT: function(expand-ir-insts<O3>)
-; GCN-O3-NEXT: amdgpu-remove-incompatible-functions
-; GCN-O3-NEXT: amdgpu-printf-runtime-binding
-; GCN-O3-NEXT: amdgpu-lower-ctor-dtor
-; GCN-O3-NEXT: function(amdgpu-image-intrinsic-opt
-; GCN-O3-NEXT: amdgpu-uniform-intrinsic-combine)
-; GCN-O3-NEXT: expand-variadics
-; GCN-O3-NEXT: amdgpu-always-inline
-; GCN-O3-NEXT: always-inline
-; GCN-O3-NEXT: amdgpu-export-kernel-runtime-handles
-; GCN-O3-NEXT: amdgpu-lower-exec-sync
-; GCN-O3-NEXT: amdgpu-sw-lower-lds
-; GCN-O3-NEXT: amdgpu-lower-module-lds
-; GCN-O3-NEXT: function(amdgpu-atomic-optimizer
-; GCN-O3-NEXT: atomic-expand
-; GCN-O3-NEXT: amdgpu-promote-alloca
-; GCN-O3-NEXT: separate-const-offset-from-gep<>
-; GCN-O3-NEXT: slsr
-; GCN-O3-NEXT: gvn<>
-; GCN-O3-NEXT: nary-reassociate
-; GCN-O3-NEXT: early-cse<>
-; GCN-O3-NEXT: amdgpu-codegenprepare
-; GCN-O3-NEXT: loop-mssa(licm<allowspeculation>)
-; GCN-O3-NEXT: verify
-; GCN-O3-NEXT: loop-mssa(canon-freeze
-; GCN-O3-NEXT: loop-reduce)
-; GCN-O3-NEXT: mergeicmps
-; GCN-O3-NEXT: expand-memcmp
-; GCN-O3-NEXT: unreachableblockelim
-; GCN-O3-NEXT: consthoist
-; GCN-O3-NEXT: replace-with-veclib
-; GCN-O3-NEXT: partially-inline-libcalls
-; GCN-O3-NEXT: ee-instrument<post-inline>
-; GCN-O3-NEXT: scalarize-masked-mem-intrin
-; GCN-O3-NEXT: expand-reductions
-; GCN-O3-NEXT: gvn<>)
-; GCN-O3-NEXT: amdgpu-preload-kernel-arguments
-; GCN-O3-NEXT: function(amdgpu-lower-kernel-arguments
-; GCN-O3-NEXT: codegenprepare
-; GCN-O3-NEXT: load-store-vectorizer)
-; GCN-O3-NEXT: amdgpu-lower-buffer-fat-pointers
-; GCN-O3-NEXT: amdgpu-lower-intrinsics
-; GCN-O3-NEXT: cgscc(function(lower-switch
-; GCN-O3-NEXT: lower-invoke
-; GCN-O3-NEXT: unreachableblockelim))
-; GCN-O3-NEXT: require<amdgpu-argument-usage>
-; GCN-O3-NEXT: cgscc(function(flatten-cfg
-; GCN-O3-NEXT: sink
-; GCN-O3-NEXT: amdgpu-late-codegenprepare
-; GCN-O3-NEXT: amdgpu-unify-divergent-exit-nodes
-; GCN-O3-NEXT: fix-irreducible
-; GCN-O3-NEXT: unify-loop-exits
-; GCN-O3-NEXT: StructurizeCFGPass
-; GCN-O3-NEXT: amdgpu-annotate-uniform
-; GCN-O3-NEXT: si-annotate-control-flow
-; GCN-O3-NEXT: amdgpu-rewrite-undef-for-phi
-; GCN-O3-NEXT: lcssa))
-; GCN-O3-NEXT: amdgpu-perf-hint
-; GCN-O3-NEXT: cgscc(function(require<uniformity>
-; GCN-O3-NEXT: objc-arc-contract
-; GCN-O3-NEXT: callbr-prepare
-; GCN-O3-NEXT: safe-stack
-; GCN-O3-NEXT: stack-protector
-; GCN-O3-NEXT: verify))
-; GCN-O3-NEXT: cgscc(function(machine-function(amdgpu-isel
-; GCN-O3-NEXT: si-fix-sgpr-copies
-; GCN-O3-NEXT: si-i1-copies
-; GCN-O3-NEXT: finalize-isel
-; GCN-O3-NEXT: early-tailduplication
-; GCN-O3-NEXT: opt-phis
-; GCN-O3-NEXT: stack-coloring
-; GCN-O3-NEXT: localstackalloc
-; GCN-O3-NEXT: dead-mi-elimination
-; GCN-O3-NEXT: early-machinelicm
-; GCN-O3-NEXT: machine-cse
-; GCN-O3-NEXT: machine-sink
-; GCN-O3-NEXT: peephole-opt
-; GCN-O3-NEXT: dead-mi-elimination
-; GCN-O3-NEXT: si-fold-operands
-; GCN-O3-NEXT: gcn-dpp-combine
-; GCN-O3-NEXT: si-load-store-opt
-; GCN-O3-NEXT: si-peephole-sdwa
-; GCN-O3-NEXT: early-machinelicm
-; GCN-O3-NEXT: machine-cse
-; GCN-O3-NEXT: si-fold-operands
-; GCN-O3-NEXT: dead-mi-elimination
-; GCN-O3-NEXT: si-shrink-instructions)))
-; GCN-O3-NEXT: require<reg-usage>
-; GCN-O3-NEXT: cgscc(function(machine-function(reg-usage-propagation
-; GCN-O3-NEXT: amdgpu-prepare-agpr-alloc
-; GCN-O3-NEXT: detect-dead-lanes
-; GCN-O3-NEXT: dead-mi-elimination
-; GCN-O3-NEXT: init-undef
-; GCN-O3-NEXT: process-imp-defs
-; GCN-O3-NEXT: unreachable-mbb-elimination
-; GCN-O3-NEXT: require<live-vars>
-; GCN-O3-NEXT: si-opt-vgpr-liverange
-; GCN-O3-NEXT: require<machine-loops>
-; GCN-O3-NEXT: phi-node-elimination
-; GCN-O3-NEXT: si-lower-control-flow
-; GCN-O3-NEXT: two-address-instruction
-; GCN-O3-NEXT: register-coalescer
-; GCN-O3-NEXT: rename-independent-subregs
-; GCN-O3-NEXT: amdgpu-rewrite-partial-reg-uses
-; GCN-O3-NEXT: machine-scheduler
-; GCN-O3-NEXT: amdgpu-pre-ra-optimizations
-; GCN-O3-NEXT: si-wqm
-; GCN-O3-NEXT: si-optimize-exec-masking-pre-ra
-; GCN-O3-NEXT: si-form-memory-clauses
-; GCN-O3-NEXT: amdgpu-pre-ra-long-branch-reg
-; GCN-O3-NEXT: greedy<sgpr>
-; GCN-O3-NEXT: virt-reg-rewriter<no-clear-vregs>
-; GCN-O3-NEXT: stack-slot-coloring
-; GCN-O3-NEXT: si-lower-sgpr-spills
-; GCN-O3-NEXT: si-pre-allocate-wwm-regs
-; GCN-O3-NEXT: greedy<wwm>
-; GCN-O3-NEXT: si-lower-wwm-copies
-; GCN-O3-NEXT: virt-reg-rewriter<no-clear-vregs>
-; GCN-O3-NEXT: amdgpu-reserve-wwm-regs
-; GCN-O3-NEXT: greedy<vgpr>
-; GCN-O3-NEXT: amdgpu-nsa-reassign
-; GCN-O3-NEXT: virt-reg-rewriter
-; GCN-O3-NEXT: amdgpu-mark-last-scratch-load
-; GCN-O3-NEXT: stack-slot-coloring
-; GCN-O3-NEXT: machine-cp
-; GCN-O3-NEXT: machinelicm
-; GCN-O3-NEXT: si-fix-vgpr-copies
-; GCN-O3-NEXT: si-optimize-exec-masking
-; GCN-O3-NEXT: remove-redundant-debug-values
-; GCN-O3-NEXT: fixup-statepoint-caller-saved
-; GCN-O3-NEXT: postra-machine-sink
-; GCN-O3-NEXT: shrink-wrap
-; GCN-O3-NEXT: prolog-epilog
-; GCN-O3-NEXT: machine-latecleanup
-; GCN-O3-NEXT: branch-folder
-; GCN-O3-NEXT: tailduplication
-; GCN-O3-NEXT: machine-cp
-; GCN-O3-NEXT: post-ra-pseudos
-; GCN-O3-NEXT: si-shrink-instructions
-; GCN-O3-NEXT: si-post-ra-bundler
-; GCN-O3-NEXT: postmisched
-; GCN-O3-NEXT: block-placement
-; GCN-O3-NEXT: fentry-insert
-; GCN-O3-NEXT: xray-instrumentation
-; GCN-O3-NEXT: gcn-create-vopd
-; GCN-O3-NEXT: si-memory-legalizer
-; GCN-O3-NEXT: si-insert-waitcnts
-; GCN-O3-NEXT: si-mode-register
-; GCN-O3-NEXT: si-insert-hard-clauses
-; GCN-O3-NEXT: si-late-branch-lowering
-; GCN-O3-NEXT: si-pre-emit-peephole
-; GCN-O3-NEXT: post-RA-hazard-rec
-; GCN-O3-NEXT: amdgpu-wait-sgpr-hazards
-; GCN-O3-NEXT: amdgpu-lower-vgpr-encoding
-; GCN-O3-NEXT: amdgpu-insert-delay-alu
-; GCN-O3-NEXT: branch-relaxation)))
-; GCN-O3-NEXT: require<reg-usage>
-; GCN-O3-NEXT: cgscc(function(machine-function(reg-usage-collector
-; GCN-O3-NEXT: remove-loads-into-fake-uses
-; GCN-O3-NEXT: live-debug-values
-; GCN-O3-NEXT: machine-sanmd
-; GCN-O3-NEXT: amdgpu-preload-kern-arg-prolog
-; GCN-O3-NEXT: stack-frame-layout
-; GCN-O3-NEXT: verify)
-; GCN-O3-NEXT: free-machine-function))
+; GCN-O3:require<MachineModuleAnalysis>
+; GCN-O3-NEXT:require<profile-summary>
+; GCN-O3-NEXT:require<collector-metadata>
+; GCN-O3-NEXT:require<runtime-libcall-info>
+; GCN-O3-NEXT:pre-isel-intrinsic-lowering
+; GCN-O3-NEXT:function(expand-ir-insts<O3>)
+; GCN-O3-NEXT:amdgpu-remove-incompatible-functions
+; GCN-O3-NEXT:amdgpu-printf-runtime-binding
+; GCN-O3-NEXT:amdgpu-lower-ctor-dtor
+; GCN-O3-NEXT:function(amdgpu-image-intrinsic-opt
+; GCN-O3-NEXT:amdgpu-uniform-intrinsic-combine)
+; GCN-O3-NEXT:expand-variadics
+; GCN-O3-NEXT:amdgpu-always-inline
+; GCN-O3-NEXT:always-inline
+; GCN-O3-NEXT:amdgpu-export-kernel-runtime-handles
+; GCN-O3-NEXT:amdgpu-lower-exec-sync
+; GCN-O3-NEXT:amdgpu-sw-lower-lds
+; GCN-O3-NEXT:amdgpu-lower-module-lds
+; GCN-O3-NEXT:function(amdgpu-atomic-optimizer
+; GCN-O3-NEXT:atomic-expand
+; GCN-O3-NEXT:amdgpu-promote-alloca
+; GCN-O3-NEXT:separate-const-offset-from-gep<>
+; GCN-O3-NEXT:slsr
+; GCN-O3-NEXT:gvn<>
+; GCN-O3-NEXT:nary-reassociate
+; GCN-O3-NEXT:early-cse<>
+; GCN-O3-NEXT:amdgpu-codegenprepare
+; GCN-O3-NEXT:loop-mssa(licm<allowspeculation>)
+; GCN-O3-NEXT:verify
+; GCN-O3-NEXT:loop-mssa(canon-freeze
+; GCN-O3-NEXT:loop-reduce)
+; GCN-O3-NEXT:mergeicmps
+; GCN-O3-NEXT:expand-memcmp
+; GCN-O3-NEXT:unreachableblockelim
+; GCN-O3-NEXT:consthoist
+; GCN-O3-NEXT:replace-with-veclib
+; GCN-O3-NEXT:partially-inline-libcalls
+; GCN-O3-NEXT:ee-instrument<post-inline>
+; GCN-O3-NEXT:scalarize-masked-mem-intrin
+; GCN-O3-NEXT:expand-reductions
+; GCN-O3-NEXT:gvn<>)
+; GCN-O3-NEXT:amdgpu-preload-kernel-arguments
+; GCN-O3-NEXT:function(amdgpu-lower-kernel-arguments
+; GCN-O3-NEXT:codegenprepare
+; GCN-O3-NEXT:load-store-vectorizer)
+; GCN-O3-NEXT:amdgpu-lower-buffer-fat-pointers
+; GCN-O3-NEXT:amdgpu-lower-intrinsics
+; GCN-O3-NEXT:cgscc(function(lower-switch
+; GCN-O3-NEXT:lower-invoke
+; GCN-O3-NEXT:unreachableblockelim))
+; GCN-O3-NEXT:require<amdgpu-argument-usage>
+; GCN-O3-NEXT:cgscc(function(flatten-cfg
+; GCN-O3-NEXT:sink
+; GCN-O3-NEXT:amdgpu-late-codegenprepare
+; GCN-O3-NEXT:amdgpu-unify-divergent-exit-nodes
+; GCN-O3-NEXT:fix-irreducible
+; GCN-O3-NEXT:unify-loop-exits
+; GCN-O3-NEXT:StructurizeCFGPass
+; GCN-O3-NEXT:amdgpu-annotate-uniform
+; GCN-O3-NEXT:si-annotate-control-flow
+; GCN-O3-NEXT:amdgpu-rewrite-undef-for-phi
+; GCN-O3-NEXT:lcssa))
+; GCN-O3-NEXT:amdgpu-perf-hint
+; GCN-O3-NEXT:cgscc(function(require<uniformity>
+; GCN-O3-NEXT:objc-arc-contract
+; GCN-O3-NEXT:callbr-prepare
+; GCN-O3-NEXT:safe-stack
+; GCN-O3-NEXT:stack-protector
+; GCN-O3-NEXT:verify))
+; GCN-O3-NEXT:cgscc(function(machine-function(amdgpu-isel
+; GCN-O3-NEXT:si-fix-sgpr-copies
+; GCN-O3-NEXT:si-i1-copies
+; GCN-O3-NEXT:finalize-isel
+; GCN-O3-NEXT:early-tailduplication
+; GCN-O3-NEXT:opt-phis
+; GCN-O3-NEXT:stack-coloring
+; GCN-O3-NEXT:localstackalloc
+; GCN-O3-NEXT:dead-mi-elimination
+; GCN-O3-NEXT:early-machinelicm
+; GCN-O3-NEXT:machine-cse
+; GCN-O3-NEXT:machine-sink
+; GCN-O3-NEXT:peephole-opt
+; GCN-O3-NEXT:dead-mi-elimination
+; GCN-O3-NEXT:si-fold-operands
+; GCN-O3-NEXT:gcn-dpp-combine
+; GCN-O3-NEXT:si-load-store-opt
+; GCN-O3-NEXT:si-peephole-sdwa
+; GCN-O3-NEXT:early-machinelicm
+; GCN-O3-NEXT:machine-cse
+; GCN-O3-NEXT:si-fold-operands
+; GCN-O3-NEXT:dead-mi-elimination
+; GCN-O3-NEXT:si-shrink-instructions)))
+; GCN-O3-NEXT:require<reg-usage>
+; GCN-O3-NEXT:cgscc(function(machine-function(reg-usage-propagation
+; GCN-O3-NEXT:amdgpu-prepare-agpr-alloc
+; GCN-O3-NEXT:detect-dead-lanes
+; GCN-O3-NEXT:dead-mi-elimination
+; GCN-O3-NEXT:init-undef
+; GCN-O3-NEXT:process-imp-defs
+; GCN-O3-NEXT:unreachable-mbb-elimination
+; GCN-O3-NEXT:require<live-vars>
+; GCN-O3-NEXT:si-opt-vgpr-liverange
+; GCN-O3-NEXT:require<machine-loops>
+; GCN-O3-NEXT:phi-node-elimination
+; GCN-O3-NEXT:si-lower-control-flow
+; GCN-O3-NEXT:two-address-instruction
+; GCN-O3-NEXT:register-coalescer
+; GCN-O3-NEXT:rename-independent-subregs
+; GCN-O3-NEXT:amdgpu-rewrite-partial-reg-uses
+; GCN-O3-NEXT:machine-scheduler
+; GCN-O3-NEXT:amdgpu-pre-ra-optimizations
+; GCN-O3-NEXT:si-wqm
+; GCN-O3-NEXT:si-optimize-exec-masking-pre-ra
+; GCN-O3-NEXT:si-form-memory-clauses
+; GCN-O3-NEXT:amdgpu-pre-ra-long-branch-reg
+; GCN-O3-NEXT:greedy<sgpr>
+; GCN-O3-NEXT:virt-reg-rewriter<no-clear-vregs>
+; GCN-O3-NEXT:stack-slot-coloring
+; GCN-O3-NEXT:si-lower-sgpr-spills
+; GCN-O3-NEXT:si-pre-allocate-wwm-regs
+; GCN-O3-NEXT:greedy<wwm>
+; GCN-O3-NEXT:si-lower-wwm-copies
+; GCN-O3-NEXT:virt-reg-rewriter<no-clear-vregs>
+; GCN-O3-NEXT:amdgpu-reserve-wwm-regs
+; GCN-O3-NEXT:greedy<vgpr>
+; GCN-O3-NEXT:amdgpu-nsa-reassign
+; GCN-O3-NEXT:amdgpu-rewrite-agpr-copy-mfma
+; GCN-O3-NEXT:virt-reg-rewriter
+; GCN-O3-NEXT:amdgpu-mark-last-scratch-load
+; GCN-O3-NEXT:stack-slot-coloring
+; GCN-O3-NEXT:machine-cp
+; GCN-O3-NEXT:machinelicm
+; GCN-O3-NEXT:si-fix-vgpr-copies
+; GCN-O3-NEXT:si-optimize-exec-masking
+; GCN-O3-NEXT:remove-redundant-debug-values
+; GCN-O3-NEXT:fixup-statepoint-caller-saved
+; GCN-O3-NEXT:postra-machine-sink
+; GCN-O3-NEXT:shrink-wrap
+; GCN-O3-NEXT:prolog-epilog
+; GCN-O3-NEXT:machine-latecleanup
+; GCN-O3-NEXT:branch-folder
+; GCN-O3-NEXT:tailduplication
+; GCN-O3-NEXT:machine-cp
+; GCN-O3-NEXT:post-ra-pseudos
+; GCN-O3-NEXT:si-shrink-instructions
+; GCN-O3-NEXT:si-post-ra-bundler
+; GCN-O3-NEXT:postmisched
+; GCN-O3-NEXT:block-placement
+; GCN-O3-NEXT:fentry-insert
+; GCN-O3-NEXT:xray-instrumentation
+; GCN-O3-NEXT:gcn-create-vopd
+; GCN-O3-NEXT:si-memory-legalizer
+; GCN-O3-NEXT:si-insert-waitcnts
+; GCN-O3-NEXT:si-mode-register
+; GCN-O3-NEXT:si-insert-hard-clauses
+; GCN-O3-NEXT:si-late-branch-lowering
+; GCN-O3-NEXT:si-pre-emit-peephole
+; GCN-O3-NEXT:post-RA-hazard-rec
+; GCN-O3-NEXT:amdgpu-wait-sgpr-hazards
+; GCN-O3-NEXT:amdgpu-lower-vgpr-encoding
+; GCN-O3-NEXT:amdgpu-insert-delay-alu
+; GCN-O3-NEXT:branch-relaxation)))
+; GCN-O3-NEXT:require<reg-usage>
+; GCN-O3-NEXT:cgscc(function(machine-function(reg-usage-collector
+; GCN-O3-NEXT:remove-loads-into-fake-uses
+; GCN-O3-NEXT:live-debug-values
+; GCN-O3-NEXT:machine-sanmd
+; GCN-O3-NEXT:amdgpu-preload-kern-arg-prolog
+; GCN-O3-NEXT:stack-frame-layout
+; GCN-O3-NEXT:verify)
+; GCN-O3-NEXT:free-machine-function))
 
 define void @empty() {
   ret void

_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to