Matthew Poremba has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/58470 )

Change subject: dev-amdgpu,arch-vega: Implement TLB invalidation logic
......................................................................

dev-amdgpu,arch-vega: Implement TLB invalidation logic

Add logic to collect pointers to all GPU TLBs in full system. Implement
the invalid TLBs PM4 packet. The invalidate is done functionally since
there is really no benefit to simulate it with timing and there is no
support in the TLB to do so. This allow application with much larger
data sets which may reuse device memory pages to work in gem5 without
possibly crashing due to a stale translation being leftover in the TLB.

Change-Id: Ia30cce02154d482d8f75b2280409abb8f8375c24
---
M src/arch/amdgpu/vega/tlb.cc
M src/dev/amdgpu/amdgpu_vm.cc
M src/dev/amdgpu/amdgpu_vm.hh
M src/dev/amdgpu/pm4_defines.hh
M src/dev/amdgpu/pm4_packet_processor.cc
5 files changed, 64 insertions(+), 2 deletions(-)



diff --git a/src/arch/amdgpu/vega/tlb.cc b/src/arch/amdgpu/vega/tlb.cc
index 4e6c7b5..2256fd7 100644
--- a/src/arch/amdgpu/vega/tlb.cc
+++ b/src/arch/amdgpu/vega/tlb.cc
@@ -98,6 +98,11 @@

     // assuming one walker per TLB, set our walker's TLB to this TLB.
     walker->setTLB(this);
+
+    // gpuDevice should non-null in full system only and is set by configs.
+    if (gpuDevice) {
+        gpuDevice->getVM().registerTLB(this);
+    }
 }

 GpuTLB::~GpuTLB()
diff --git a/src/dev/amdgpu/amdgpu_vm.cc b/src/dev/amdgpu/amdgpu_vm.cc
index 596558a..c0c9209 100644
--- a/src/dev/amdgpu/amdgpu_vm.cc
+++ b/src/dev/amdgpu/amdgpu_vm.cc
@@ -32,6 +32,7 @@
 #include "dev/amdgpu/amdgpu_vm.hh"

 #include "arch/amdgpu/vega/pagetable_walker.hh"
+#include "arch/amdgpu/vega/tlb.hh"
 #include "arch/generic/mmu.hh"
 #include "base/trace.hh"
 #include "debug/AMDGPUDevice.hh"
@@ -163,6 +164,23 @@
 }

 void
+AMDGPUVM::registerTLB(VegaISA::GpuTLB *tlb)
+{
+    DPRINTF(AMDGPUDevice, "Registered a TLB with device\n");
+    gpu_tlbs.push_back(tlb);
+}
+
+void
+AMDGPUVM::invalidateTLBs()
+{
+    DPRINTF(AMDGPUDevice, "Invalidating all TLBs\n");
+    for (auto &tlb : gpu_tlbs) {
+        tlb->invalidateAll();
+        DPRINTF(AMDGPUDevice, " ... TLB invalidated\n");
+    }
+}
+
+void
 AMDGPUVM::serialize(CheckpointOut &cp) const
 {
     Addr vm0PTBase = vmContext0.ptBase;
diff --git a/src/dev/amdgpu/amdgpu_vm.hh b/src/dev/amdgpu/amdgpu_vm.hh
index 72745f0..e16975b 100644
--- a/src/dev/amdgpu/amdgpu_vm.hh
+++ b/src/dev/amdgpu/amdgpu_vm.hh
@@ -143,6 +143,12 @@
     uint64_t mmhubBase = 0x0;
     uint64_t mmhubTop = 0x0;

+    /**
+ * List of TLBs associated with the GPU device. This is used for flushing
+     * the TLBs upon a driver request.
+     */
+    std::vector<VegaISA::GpuTLB *> gpu_tlbs;
+
   public:
     AMDGPUVM();

@@ -276,6 +282,13 @@
         return vmContexts[vmid].ptStart;
     }

+    /**
+     * Control methods for TLBs associated with the GPU device.
+     */
+    void registerTLB(VegaISA::GpuTLB *tlb);
+    void invalidateTLBs();
+
+
     void serialize(CheckpointOut &cp) const override;
     void unserialize(CheckpointIn &cp) override;

diff --git a/src/dev/amdgpu/pm4_defines.hh b/src/dev/amdgpu/pm4_defines.hh
index b7e9952..b690e54 100644
--- a/src/dev/amdgpu/pm4_defines.hh
+++ b/src/dev/amdgpu/pm4_defines.hh
@@ -43,9 +43,11 @@
 {

 /**
- * PM4 opcodes. Taken from linux tree at
- * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/rocm-4.3.x/
+ * PM4 opcodes. Taken from linux tree from the following locations:
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-4.3.x/
  *     drivers/gpu/drm/amd/amdkfd/kfd_pm4_opcodes.h
+ * https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/roc-4.3.x/
+ *     drivers/gpu/drm/amd/amdgpu/soc15d.h
  */
 enum it_opcode_type
 {
@@ -56,6 +58,7 @@
     IT_RELEASE_MEM                       = 0x49,
     IT_SET_UCONFIG_REG                   = 0x79,
     IT_SWITCH_BUFFER                     = 0x8B,
+    IT_INVALIDATE_TLBS                   = 0x98,
     IT_MAP_PROCESS                       = 0xA1,
     IT_MAP_QUEUES                        = 0xA2,
     IT_UNMAP_QUEUES                      = 0xA3,
diff --git a/src/dev/amdgpu/pm4_packet_processor.cc b/src/dev/amdgpu/pm4_packet_processor.cc
index cfd031b..0ee2034 100644
--- a/src/dev/amdgpu/pm4_packet_processor.cc
+++ b/src/dev/amdgpu/pm4_packet_processor.cc
@@ -305,6 +305,13 @@
                     dmaBuffer);
         } break;

+      case IT_INVALIDATE_TLBS: {
+        DPRINTF(PM4PacketProcessor, "Functionaly invalidating all TLBs\n");
+        gpuDevice->getVM().invalidateTLBs();
+        q->incRptr((header.count + 1) * sizeof(uint32_t));
+        decodeNext(q);
+        } break;
+
       default: {
         warn("PM4 packet opcode 0x%x not supported.\n", header.opcode);
DPRINTF(PM4PacketProcessor, "PM4 packet opcode 0x%x not supported.\n",

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/58470
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: Ia30cce02154d482d8f75b2280409abb8f8375c24
Gerrit-Change-Number: 58470
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to