[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-26 Thread Petar Avramovic via llvm-branch-commits

petar-avramovic wrote:

https://github.com/llvm/llvm-project/pull/145886

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-26 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic closed 
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-25 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/142789

>From fada12c02954dd1c244c944fa37dbae674284923 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 124 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++
 3 files changed, 127 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b38dacfe9958d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,111 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
+  return {nullptr, -1};
+
+Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+  return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+  MRI.replaceRegWith(Dst, Src);
+else
+  B.buildCopy(Dst, Src);
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+  RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+  // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+  // Dst = Copy Src   $Dst = Copy Src
+  // ->   ->
+  // Dst = RALSrc $Dst = Copy RALSrc
+  replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+  // RALDst = READANYLANE RALSrc  RALDst = READANYLANE RALSrc
+  // Src = G_BITCAST RALDst   Src = G_BITCAST RALDst
+  // Dst = Copy Src   Dst = Copy Src
+  // ->  ->
+  // NewVgpr = G_BITCAST RALDst   NewVgpr = G_BITCAST RALDst
+  // Dst = NewVgpr$Dst = Copy NewVgpr
+  auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+  replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+  return;
+
 Reg

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-25 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/142789

>From fada12c02954dd1c244c944fa37dbae674284923 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 124 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++
 3 files changed, 127 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b38dacfe9958d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,111 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() != AMDGPU::G_AMDGPU_READANYLANE)
+  return {nullptr, -1};
+
+Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+  return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+  MRI.replaceRegWith(Dst, Src);
+else
+  B.buildCopy(Dst, Src);
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+  RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+  // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+  // Dst = Copy Src   $Dst = Copy Src
+  // ->   ->
+  // Dst = RALSrc $Dst = Copy RALSrc
+  replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+  // RALDst = READANYLANE RALSrc  RALDst = READANYLANE RALSrc
+  // Src = G_BITCAST RALDst   Src = G_BITCAST RALDst
+  // Dst = Copy Src   Dst = Copy Src
+  // ->  ->
+  // NewVgpr = G_BITCAST RALDst   NewVgpr = G_BITCAST RALDst
+  // Dst = NewVgpr$Dst = Copy NewVgpr
+  auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+  replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+  return;
+
 Reg

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-20 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {

Pierre-vh wrote:

use early return here?

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-20 Thread Pierre van Houtryve via llvm-branch-commits

https://github.com/Pierre-vh approved this pull request.


https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-19 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/142789

>From 28f0f171b27aaf707706db71978d525c12e21491 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 122 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++
 3 files changed, 125 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+  MRI.replaceRegWith(Dst, Src);
+else
+  B.buildCopy(Dst, Src);
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+  RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+  // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+  // Dst = Copy Src   $Dst = Copy Src
+  // ->   ->
+  // Dst = RALSrc $Dst = Copy RALSrc
+  replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+  // RALDst = READANYLANE RALSrc  RALDst = READANYLANE RALSrc
+  // Src = G_BITCAST RALDst   Src = G_BITCAST RALDst
+  // Dst = Copy Src   Dst = Copy Src
+  // ->  ->
+  // NewVgpr = G_BITCAST RALDst   NewVgpr = G_BITCAST RALDst
+  // Dst = NewVgpr$Dst = Copy NewVgpr
+  auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+  replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+  return;
+
 Register Dst = MI.get

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-19 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/142789

>From 28f0f171b27aaf707706db71978d525c12e21491 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 122 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++
 3 files changed, 125 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+  MRI.replaceRegWith(Dst, Src);
+else
+  B.buildCopy(Dst, Src);
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+  RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+  // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+  // Dst = Copy Src   $Dst = Copy Src
+  // ->   ->
+  // Dst = RALSrc $Dst = Copy RALSrc
+  replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+  // RALDst = READANYLANE RALSrc  RALDst = READANYLANE RALSrc
+  // Src = G_BITCAST RALDst   Src = G_BITCAST RALDst
+  // Dst = Copy Src   Dst = Copy Src
+  // ->  ->
+  // NewVgpr = G_BITCAST RALDst   NewVgpr = G_BITCAST RALDst
+  // Dst = NewVgpr$Dst = Copy NewVgpr
+  auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+  replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+  return;
+
 Register Dst = MI.get

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Petar Avramovic via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}

petar-avramovic wrote:

Not sure, did not see any cases yet

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Petar Avramovic via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+if (Dst.isVirtual()) {
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);

petar-avramovic wrote:

No, have to set it manually before using the builder, it was a bug.

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/142789

>From 64d7853a9edefabe8de40748e01348d2d5c017c5 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 122 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++
 3 files changed, 125 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+  MRI.replaceRegWith(Dst, Src);
+else
+  B.buildCopy(Dst, Src);
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+  RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+  // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+  // Dst = Copy Src   $Dst = Copy Src
+  // ->   ->
+  // Dst = RALSrc $Dst = Copy RALSrc
+  replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+  // RALDst = READANYLANE RALSrc  RALDst = READANYLANE RALSrc
+  // Src = G_BITCAST RALDst   Src = G_BITCAST RALDst
+  // Dst = Copy Src   Dst = Copy Src
+  // ->  ->
+  // NewVgpr = G_BITCAST RALDst   NewVgpr = G_BITCAST RALDst
+  // Dst = NewVgpr$Dst = Copy NewVgpr
+  auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+  replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+  return;
+
 Register Dst = MI.get

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic updated 
https://github.com/llvm/llvm-project/pull/142789

>From 64d7853a9edefabe8de40748e01348d2d5c017c5 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Thu, 5 Jun 2025 12:17:13 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 122 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++
 3 files changed, 125 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..6707b641b0d25 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,109 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::pair tryMatchRALFromUnmerge(Register Src) {
+MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  void replaceRegWithOrBuildCopy(Register Dst, Register Src) {
+if (Dst.isVirtual())
+  MRI.replaceRegWith(Dst, Src);
+else
+  B.buildCopy(Dst, Src);
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST)
+  RALDst = SrcMI.getOperand(1).getReg();
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+B.setInstr(Copy);
+if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+  // Src = READANYLANE RALSrc Src = READANYLANE RALSrc
+  // Dst = Copy Src   $Dst = Copy Src
+  // ->   ->
+  // Dst = RALSrc $Dst = Copy RALSrc
+  replaceRegWithOrBuildCopy(Dst, RALSrc);
+} else {
+  // RALDst = READANYLANE RALSrc  RALDst = READANYLANE RALSrc
+  // Src = G_BITCAST RALDst   Src = G_BITCAST RALDst
+  // Dst = Copy Src   Dst = Copy Src
+  // ->  ->
+  // NewVgpr = G_BITCAST RALDst   NewVgpr = G_BITCAST RALDst
+  // Dst = NewVgpr$Dst = Copy NewVgpr
+  auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+  replaceRegWithOrBuildCopy(Dst, Bitcast.getReg(0));
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineInstr &MI) {
+if (tryEliminateReadAnyLane(MI))
+  return;
+
 Register Dst = MI.get

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+if (Dst.isVirtual()) {
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);

Pierre-vh wrote:

Does this work as intended without the `B.setInstr(Copy)` call?

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+if (Dst.isVirtual()) {
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);

Pierre-vh wrote:

Just wondering, can we just emit a COPY instead and let another combine take 
care of the folding?
The two branches are very similar, it'd be nice to make this more terse. Maybe 
we could use a helper like `copyOrReplace` for `Dst` that does the right thing 
depending on whether `Dst` is virtual or not?

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)

Pierre-vh wrote:

```suggestion
  if (auto *UnMerge = getOpcodeDef(RALSrc, MRI))
```

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}

Pierre-vh wrote:

```suggestion
if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) 
  RALDst = SrcMI.getOperand(1).getReg();
```

nit: can we have other opcodes than bitcast and that'd matter, like inreg 
extensions, assert exts ?
It feels like we should have a helper for this somewhere

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {

Pierre-vh wrote:

```suggestion
  std::pair tryMatchRALFromUnmerge(Register Src) {
```

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-05 Thread Pierre van Houtryve via llvm-branch-commits


@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);

Pierre-vh wrote:

```suggestion
MachineInstr *ReadAnyLane = MRI.getVRegDef(Src);
```
I think we generally use `auto` only if the type is already in the RHS

https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-04 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Petar Avramovic (petar-avramovic)


Changes



---

Patch is 22.08 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/142789.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+118-18) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll 
(+2-23) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir 
(+19-59) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b5fe0ed499255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+if (Dst.isVirtual()) {
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+MRI.replaceRegWith(Dst, Bitcast.getReg(0));
+  }
+} else {
+  B.setInstr(Copy);
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// $Dst = Copy Src
+// ->
+// $Dst = Copy RALSrc
+B.buildCopy(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// $Dst = Copy NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+B.buildCopy(Dst, Bitcast.getReg(0));
+  }
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(MachineI

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-04 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-globalisel

Author: Petar Avramovic (petar-avramovic)


Changes



---

Patch is 22.08 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/142789.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp (+118-18) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.ll 
(+2-23) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/readanylane-combines.mir 
(+19-59) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b5fe0ed499255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+if (Dst.isVirtual()) {
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+MRI.replaceRegWith(Dst, Bitcast.getReg(0));
+  }
+} else {
+  B.setInstr(Copy);
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// $Dst = Copy Src
+// ->
+// $Dst = Copy RALSrc
+B.buildCopy(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// $Dst = Copy NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+B.buildCopy(Dst, Bitcast.getReg(0));
+  }
+}
+
+eraseInstr(Copy, MRI, nullptr);
+return true;
+  }
+
   void tryCombineCopy(Machine

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-04 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic ready_for_review 
https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-04 Thread Petar Avramovic via llvm-branch-commits

petar-avramovic wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#142790** https://app.graphite.dev/github/pr/llvm/llvm-project/142790?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#142789** https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/142789?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#142788** https://app.graphite.dev/github/pr/llvm/llvm-project/142788?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/142789
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Improve readanylane combines in regbanklegalize (PR #142789)

2025-06-04 Thread Petar Avramovic via llvm-branch-commits

https://github.com/petar-avramovic created 
https://github.com/llvm/llvm-project/pull/142789

None

>From fcd0dc75f4674297ef1f5c591ecf6c16314ce3e2 Mon Sep 17 00:00:00 2001
From: Petar Avramovic 
Date: Wed, 4 Jun 2025 17:12:16 +0200
Subject: [PATCH] AMDGPU/GlobalISel: Improve readanylane combines in
 regbanklegalize

---
 .../Target/AMDGPU/AMDGPURegBankLegalize.cpp   | 136 +++---
 .../AMDGPU/GlobalISel/readanylane-combines.ll |  25 +---
 .../GlobalISel/readanylane-combines.mir   |  78 +++---
 3 files changed, 139 insertions(+), 100 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index ba661348ca5b5..b5fe0ed499255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -23,6 +23,7 @@
 #include "GCNSubtarget.h"
 #include "llvm/CodeGen/GlobalISel/CSEInfo.h"
 #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineUniformityAnalysis.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -137,7 +138,123 @@ class AMDGPURegBankLegalizeCombiner {
 return {MatchMI, MatchMI->getOperand(1).getReg()};
   }
 
+  std::tuple tryMatchRALFromUnmerge(Register Src) {
+auto *ReadAnyLane = MRI.getVRegDef(Src);
+if (ReadAnyLane->getOpcode() == AMDGPU::G_AMDGPU_READANYLANE) {
+  Register RALSrc = ReadAnyLane->getOperand(1).getReg();
+  auto *UnMerge = getOpcodeDef(RALSrc, MRI);
+  if (UnMerge)
+return {UnMerge, UnMerge->findRegisterDefOperandIdx(RALSrc, nullptr)};
+}
+return {nullptr, -1};
+  }
+
+  Register getReadAnyLaneSrc(Register Src) {
+// Src = G_AMDGPU_READANYLANE RALSrc
+auto [RAL, RALSrc] = tryMatch(Src, AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+
+// LoVgpr, HiVgpr = G_UNMERGE_VALUES UnmergeSrc
+// LoSgpr = G_AMDGPU_READANYLANE LoVgpr
+// HiSgpr = G_AMDGPU_READANYLANE HiVgpr
+// Src G_MERGE_VALUES LoSgpr, HiSgpr
+auto *Merge = getOpcodeDef(Src, MRI);
+if (Merge) {
+  unsigned NumElts = Merge->getNumSources();
+  auto [Unmerge, Idx] = tryMatchRALFromUnmerge(Merge->getSourceReg(0));
+  if (!Unmerge || Unmerge->getNumDefs() != NumElts || Idx != 0)
+return {};
+
+  // check if all elements are from same unmerge and there is no shuffling
+  for (unsigned i = 1; i < NumElts; ++i) {
+auto [UnmergeI, IdxI] = tryMatchRALFromUnmerge(Merge->getSourceReg(i));
+if (UnmergeI != Unmerge || (unsigned)IdxI != i)
+  return {};
+  }
+  return Unmerge->getSourceReg();
+}
+
+// ..., VgprI, ... = G_UNMERGE_VALUES VgprLarge
+// SgprI = G_AMDGPU_READANYLANE VgprI
+// SgprLarge G_MERGE_VALUES ..., SgprI, ...
+// ..., Src, ... = G_UNMERGE_VALUES SgprLarge
+auto *UnMerge = getOpcodeDef(Src, MRI);
+if (UnMerge) {
+  int Idx = UnMerge->findRegisterDefOperandIdx(Src, nullptr);
+  auto *Merge = getOpcodeDef(UnMerge->getSourceReg(), 
MRI);
+  if (Merge) {
+auto [RAL, RALSrc] =
+tryMatch(Merge->getSourceReg(Idx), AMDGPU::G_AMDGPU_READANYLANE);
+if (RAL)
+  return RALSrc;
+  }
+}
+
+return {};
+  }
+
+  bool tryEliminateReadAnyLane(MachineInstr &Copy) {
+Register Dst = Copy.getOperand(0).getReg();
+Register Src = Copy.getOperand(1).getReg();
+if (!Src.isVirtual())
+  return false;
+
+Register RALDst = Src;
+MachineInstr &SrcMI = *MRI.getVRegDef(Src);
+if (SrcMI.getOpcode() == AMDGPU::G_BITCAST) {
+  RALDst = SrcMI.getOperand(1).getReg();
+}
+
+Register RALSrc = getReadAnyLaneSrc(RALDst);
+if (!RALSrc)
+  return false;
+
+if (Dst.isVirtual()) {
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// Dst = Copy Src
+// ->
+// Dst = RALSrc
+MRI.replaceRegWith(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// Dst = NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+MRI.replaceRegWith(Dst, Bitcast.getReg(0));
+  }
+} else {
+  B.setInstr(Copy);
+  if (SrcMI.getOpcode() != AMDGPU::G_BITCAST) {
+// Src = READANYLANE RALSrc
+// $Dst = Copy Src
+// ->
+// $Dst = Copy RALSrc
+B.buildCopy(Dst, RALSrc);
+  } else {
+// RALDst = READANYLANE RALSrc
+// Src = G_BITCAST RALDst
+// Dst = Copy Src
+// ->
+// NewVgpr = G_BITCAST RALDst
+// $Dst = Copy NewVgpr
+auto Bitcast = B.buildBitcast({VgprRB, MRI.getType(Src)}, RALSrc);
+B.buildCopy(Dst, Bitcast.getReg(0));
+  }
+}
+
+eraseInstr(Copy, MRI, nullptr);