Matthew Poremba has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/49847 )
Change subject: arch-gcn3,gpu-compute: Move GCN3 specific TLB to arch
......................................................................
arch-gcn3,gpu-compute: Move GCN3 specific TLB to arch
Move GpuTLB and TLBCoalescer to GCN3 as the TLB format is specific to
GCN3 and SE mode / APU simulation. Vega will have its own TLB,
coalescer, and walker suitable for a dGPU. This also adds a typedef /
using for the TLB translation state to reduce the number of references
to TheISA and X86ISA.
Change-Id: I34448bb4e5ddb9980b34a55bc717bbcea0e03db5
---
M src/arch/amdgpu/gcn3/SConscript
R src/arch/amdgpu/gcn3/X86GPUTLB.py
M src/arch/amdgpu/gcn3/gpu_isa.hh
R src/arch/amdgpu/gcn3/tlb.cc
R src/arch/amdgpu/gcn3/tlb.hh
R src/arch/amdgpu/gcn3/tlb_coalescer.cc
R src/arch/amdgpu/gcn3/tlb_coalescer.hh
M src/gpu-compute/SConscript
M src/gpu-compute/compute_unit.cc
M src/gpu-compute/fetch_unit.cc
M src/gpu-compute/shader.cc
M src/gpu-compute/shader.hh
12 files changed, 57 insertions(+), 53 deletions(-)
diff --git a/src/arch/amdgpu/gcn3/SConscript
b/src/arch/amdgpu/gcn3/SConscript
index 61c93c3..dc4660f 100644
--- a/src/arch/amdgpu/gcn3/SConscript
+++ b/src/arch/amdgpu/gcn3/SConscript
@@ -39,10 +39,15 @@
Return()
if env['TARGET_GPU_ISA'] == 'gcn3':
+ SimObject('X86GPUTLB.py')
+
Source('decoder.cc')
Source('insts/gpu_static_inst.cc')
Source('insts/instructions.cc')
Source('insts/op_encodings.cc')
Source('isa.cc')
Source('registers.cc')
+ Source('tlb.cc')
+ Source('tlb_coalescer.cc')
+
DebugFlag('GCN3', 'Debug flag for GCN3 GPU ISA')
diff --git a/src/gpu-compute/X86GPUTLB.py
b/src/arch/amdgpu/gcn3/X86GPUTLB.py
similarity index 96%
rename from src/gpu-compute/X86GPUTLB.py
rename to src/arch/amdgpu/gcn3/X86GPUTLB.py
index ab14bf8..1c7f1d0 100644
--- a/src/gpu-compute/X86GPUTLB.py
+++ b/src/arch/amdgpu/gcn3/X86GPUTLB.py
@@ -39,7 +39,7 @@
class X86GPUTLB(ClockedObject):
type = 'X86GPUTLB'
cxx_class = 'gem5::X86ISA::GpuTLB'
- cxx_header = 'gpu-compute/gpu_tlb.hh'
+ cxx_header = 'arch/amdgpu/gcn3/tlb.hh'
size = Param.Int(64, "TLB size (number of entries)")
assoc = Param.Int(64, "TLB associativity")
@@ -63,7 +63,8 @@
class TLBCoalescer(ClockedObject):
type = 'TLBCoalescer'
cxx_class = 'gem5::TLBCoalescer'
- cxx_header = 'gpu-compute/tlb_coalescer.hh'
+ cxx_header = 'arch/amdgpu/gcn3/tlb_coalescer.hh'
+
probesPerCycle = Param.Int(2, "Number of TLB probes per cycle")
coalescingWindow = Param.Int(1, "Permit coalescing across that many
ticks")
cpu_side_ports = VectorResponsePort("Port on side closer to CPU/CU")
diff --git a/src/arch/amdgpu/gcn3/gpu_isa.hh
b/src/arch/amdgpu/gcn3/gpu_isa.hh
index 65136bb..205f097 100644
--- a/src/arch/amdgpu/gcn3/gpu_isa.hh
+++ b/src/arch/amdgpu/gcn3/gpu_isa.hh
@@ -38,6 +38,7 @@
#include <type_traits>
#include "arch/amdgpu/gcn3/gpu_registers.hh"
+#include "arch/amdgpu/gcn3/tlb.hh"
#include "gpu-compute/dispatcher.hh"
#include "gpu-compute/hsa_queue_entry.hh"
#include "gpu-compute/misc.hh"
diff --git a/src/gpu-compute/gpu_tlb.cc b/src/arch/amdgpu/gcn3/tlb.cc
similarity index 98%
rename from src/gpu-compute/gpu_tlb.cc
rename to src/arch/amdgpu/gcn3/tlb.cc
index 9560b4d..6e73148 100644
--- a/src/gpu-compute/gpu_tlb.cc
+++ b/src/arch/amdgpu/gcn3/tlb.cc
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2015 Advanced Micro Devices, Inc.
+ * Copyright (c) 2011-2021 Advanced Micro Devices, Inc.
* All rights reserved.
*
* For use for simulation and test purposes only
@@ -14,9 +14,9 @@
* this list of conditions and the following disclaimer in the
documentation
* and/or other materials provided with the distribution.
*
- * 3. Neither the name of the copyright holder nor the names of its
contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
+ * 3. Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
this
+ * software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE
@@ -30,10 +30,9 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE
* POSSIBILITY OF SUCH DAMAGE.
*
- * Author: Lisa Hsu
*/
-#include "gpu-compute/gpu_tlb.hh"
+#include "arch/amdgpu/gcn3/tlb.hh"
#include <cmath>
#include <cstring>
diff --git a/src/gpu-compute/gpu_tlb.hh b/src/arch/amdgpu/gcn3/tlb.hh
similarity index 98%
rename from src/gpu-compute/gpu_tlb.hh
rename to src/arch/amdgpu/gcn3/tlb.hh
index 4652a73..944c0ac 100644
--- a/src/gpu-compute/gpu_tlb.hh
+++ b/src/arch/amdgpu/gcn3/tlb.hh
@@ -438,6 +438,8 @@
};
}
+using GpuTranslationState = X86ISA::GpuTLB::TranslationState;
+
} // namespace gem5
#endif // __GPU_TLB_HH__
diff --git a/src/gpu-compute/tlb_coalescer.cc
b/src/arch/amdgpu/gcn3/tlb_coalescer.cc
similarity index 94%
rename from src/gpu-compute/tlb_coalescer.cc
rename to src/arch/amdgpu/gcn3/tlb_coalescer.cc
index d82fa7e..9b53db8 100644
--- a/src/gpu-compute/tlb_coalescer.cc
+++ b/src/arch/amdgpu/gcn3/tlb_coalescer.cc
@@ -31,7 +31,7 @@
* POSSIBILITY OF SUCH DAMAGE.
*/
-#include "gpu-compute/tlb_coalescer.hh"
+#include "arch/amdgpu/gcn3/tlb_coalescer.hh"
#include <cstring>
@@ -101,11 +101,11 @@
if (disableCoalescing)
return false;
- TheISA::GpuTLB::TranslationState *incoming_state =
-
safe_cast<TheISA::GpuTLB::TranslationState*>(incoming_pkt->senderState);
+ GpuTranslationState *incoming_state =
+ safe_cast<GpuTranslationState*>(incoming_pkt->senderState);
- TheISA::GpuTLB::TranslationState *coalesced_state =
-
safe_cast<TheISA::GpuTLB::TranslationState*>(coalesced_pkt->senderState);
+ GpuTranslationState *coalesced_state =
+ safe_cast<GpuTranslationState*>(coalesced_pkt->senderState);
// Rule 1: Coalesce requests only if they
// fall within the same virtual page
@@ -148,8 +148,8 @@
DPRINTF(GPUTLB, "Update phys. addr. for %d coalesced reqs for
page %#x\n",
issuedTranslationsTable[virt_page_addr].size(),
virt_page_addr);
- TheISA::GpuTLB::TranslationState *sender_state =
- safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
TheISA::TlbEntry *tlb_entry = sender_state->tlbEntry;
assert(tlb_entry);
@@ -167,8 +167,8 @@
for (int i = 0; i < issuedTranslationsTable[virt_page_addr].size();
++i) {
PacketPtr local_pkt = issuedTranslationsTable[virt_page_addr][i];
- TheISA::GpuTLB::TranslationState *sender_state =
- safe_cast<TheISA::GpuTLB::TranslationState*>(
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(
local_pkt->senderState);
// we are sending the packet back, so pop the reqCnt associated
@@ -238,8 +238,8 @@
// number of coalesced reqs for a given window
int coalescedReq_cnt = 0;
- TheISA::GpuTLB::TranslationState *sender_state =
- safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
// push back the port to remember the path back
sender_state->ports.push_back(this);
@@ -337,8 +337,8 @@
TLBCoalescer::CpuSidePort::recvFunctional(PacketPtr pkt)
{
- TheISA::GpuTLB::TranslationState *sender_state =
- safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
bool update_stats = !sender_state->isPrefetch;
@@ -460,8 +460,8 @@
rejected = true;
++vector_index;
} else {
- TheISA::GpuTLB::TranslationState *tmp_sender_state =
- safe_cast<TheISA::GpuTLB::TranslationState*>
+ GpuTranslationState *tmp_sender_state =
+ safe_cast<GpuTranslationState*>
(first_packet->senderState);
bool update_stats = !tmp_sender_state->isPrefetch;
diff --git a/src/gpu-compute/tlb_coalescer.hh
b/src/arch/amdgpu/gcn3/tlb_coalescer.hh
similarity index 99%
rename from src/gpu-compute/tlb_coalescer.hh
rename to src/arch/amdgpu/gcn3/tlb_coalescer.hh
index fce8740..afe12c9 100644
--- a/src/gpu-compute/tlb_coalescer.hh
+++ b/src/arch/amdgpu/gcn3/tlb_coalescer.hh
@@ -39,13 +39,13 @@
#include <string>
#include <vector>
+#include "arch/amdgpu/gcn3/tlb.hh"
#include "arch/generic/tlb.hh"
#include "arch/x86/isa.hh"
#include "arch/x86/pagetable.hh"
#include "arch/x86/regs/segment.hh"
#include "base/logging.hh"
#include "base/statistics.hh"
-#include "gpu-compute/gpu_tlb.hh"
#include "mem/port.hh"
#include "mem/request.hh"
#include "params/TLBCoalescer.hh"
diff --git a/src/gpu-compute/SConscript b/src/gpu-compute/SConscript
index ae0bfab..2ccf1b7 100644
--- a/src/gpu-compute/SConscript
+++ b/src/gpu-compute/SConscript
@@ -39,7 +39,6 @@
SimObject('GPU.py')
SimObject('GPUStaticInstFlags.py')
SimObject('LdsState.py')
-SimObject('X86GPUTLB.py')
Source('comm.cc')
Source('compute_unit.cc')
@@ -54,7 +53,6 @@
Source('gpu_exec_context.cc')
Source('gpu_render_driver.cc')
Source('gpu_static_inst.cc')
-Source('gpu_tlb.cc')
Source('lds_state.cc')
Source('local_memory_pipeline.cc')
Source('pool_manager.cc')
@@ -69,7 +67,6 @@
Source('dyn_pool_manager.cc')
Source('simple_pool_manager.cc')
Source('static_register_manager_policy.cc')
-Source('tlb_coalescer.cc')
Source('vector_register_file.cc')
Source('wavefront.cc')
diff --git a/src/gpu-compute/compute_unit.cc
b/src/gpu-compute/compute_unit.cc
index e704aec..34280a4 100644
--- a/src/gpu-compute/compute_unit.cc
+++ b/src/gpu-compute/compute_unit.cc
@@ -1076,8 +1076,8 @@
pkt->senderState = new DTLBPort::SenderState(gpuDynInst, index);
// This is the senderState needed by the TLB hierarchy to function
- X86ISA::GpuTLB::TranslationState *translation_state =
- new X86ISA::GpuTLB::TranslationState(TLB_mode, shader->gpuTc,
false,
+ GpuTranslationState *translation_state =
+ new GpuTranslationState(TLB_mode, shader->gpuTc, false,
pkt->senderState);
pkt->senderState = translation_state;
@@ -1091,8 +1091,8 @@
stats.hitsPerTLBLevel[hit_level]++;
// New SenderState for the memory access
- X86ISA::GpuTLB::TranslationState *sender_state =
-
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
delete sender_state->tlbEntry;
delete sender_state->saved;
@@ -1169,7 +1169,7 @@
delete pkt->senderState;
// Because it's atomic operation, only need TLB translation state
- pkt->senderState = new X86ISA::GpuTLB::TranslationState(TLB_mode,
+ pkt->senderState = new GpuTranslationState(TLB_mode,
shader->gpuTc);
tlbPort[tlbPort_index].sendFunctional(pkt);
@@ -1190,8 +1190,8 @@
new_pkt->req->getPaddr());
// safe_cast the senderState
- X86ISA::GpuTLB::TranslationState *sender_state =
-
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
delete sender_state->tlbEntry;
delete new_pkt;
@@ -1211,7 +1211,7 @@
new ComputeUnit::ScalarDTLBPort::SenderState(gpuDynInst);
pkt->senderState =
- new X86ISA::GpuTLB::TranslationState(tlb_mode, shader->gpuTc,
false,
+ new GpuTranslationState(tlb_mode, shader->gpuTc, false,
pkt->senderState);
if (scalarDTLBPort.isStalled()) {
@@ -1397,8 +1397,8 @@
computeUnit->stats.tlbCycles += curTick();
// pop off the TLB translation state
- X86ISA::GpuTLB::TranslationState *translation_state =
-
safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *translation_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
// no PageFaults are permitted for data accesses
if (!translation_state->tlbEntry) {
@@ -1508,15 +1508,15 @@
// Because it's atomic operation, only need TLB translation
state
prefetch_pkt->senderState =
- new X86ISA::GpuTLB::TranslationState(TLB_mode,
+ new GpuTranslationState(TLB_mode,
computeUnit->shader->gpuTc, true);
// Currently prefetches are zero-latency, hence the
sendFunctional
sendFunctional(prefetch_pkt);
/* safe_cast the senderState */
- X86ISA::GpuTLB::TranslationState *tlb_state =
- safe_cast<X86ISA::GpuTLB::TranslationState*>(
+ GpuTranslationState *tlb_state =
+ safe_cast<GpuTranslationState*>(
prefetch_pkt->senderState);
@@ -1663,8 +1663,8 @@
{
assert(pkt->senderState);
- X86ISA::GpuTLB::TranslationState *translation_state =
- safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *translation_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
// Page faults are not allowed
fatal_if(!translation_state->tlbEntry,
@@ -1728,8 +1728,8 @@
assert(pkt->senderState);
// pop off the TLB translation state
- X86ISA::GpuTLB::TranslationState *translation_state
- = safe_cast<X86ISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *translation_state
+ = safe_cast<GpuTranslationState*>(pkt->senderState);
bool success = translation_state->tlbEntry != nullptr;
delete translation_state->tlbEntry;
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc
index 16ea7cc..437a48d 100644
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -174,7 +174,7 @@
// Sender State needed by TLB hierarchy
pkt->senderState =
- new TheISA::GpuTLB::TranslationState(BaseMMU::Execute,
+ new GpuTranslationState(BaseMMU::Execute,
computeUnit.shader->gpuTc,
false, pkt->senderState);
@@ -201,13 +201,13 @@
}
} else {
pkt->senderState =
- new TheISA::GpuTLB::TranslationState(BaseMMU::Execute,
+ new GpuTranslationState(BaseMMU::Execute,
computeUnit.shader->gpuTc);
computeUnit.sqcTLBPort.sendFunctional(pkt);
- TheISA::GpuTLB::TranslationState *sender_state =
-
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
delete sender_state->tlbEntry;
delete sender_state;
diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc
index df0295f..cb1df66 100644
--- a/src/gpu-compute/shader.cc
+++ b/src/gpu-compute/shader.cc
@@ -35,7 +35,6 @@
#include <limits>
-#include "arch/x86/linux/linux.hh"
#include "arch/x86/page_size.hh"
#include "base/chunk_generator.hh"
#include "debug/GPUAgentDisp.hh"
@@ -430,7 +429,7 @@
{
// update senderState. Need to know the gpuTc and the TLB mode
pkt->senderState =
- new TheISA::GpuTLB::TranslationState(mode, gpuTc, false);
+ new GpuTranslationState(mode, gpuTc, false);
// even when the perLaneTLB flag is turned on
// it's ok tp send all accesses through lane 0
@@ -439,8 +438,8 @@
cuList[cu_id]->tlbPort[0].sendFunctional(pkt);
/* safe_cast the senderState */
- TheISA::GpuTLB::TranslationState *sender_state =
-
safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->senderState);
+ GpuTranslationState *sender_state =
+ safe_cast<GpuTranslationState*>(pkt->senderState);
delete sender_state->tlbEntry;
delete pkt->senderState;
diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh
index 5a891c3..6108bdf 100644
--- a/src/gpu-compute/shader.hh
+++ b/src/gpu-compute/shader.hh
@@ -37,6 +37,7 @@
#include <functional>
#include <string>
+#include "arch/gpu_isa.hh"
#include "base/statistics.hh"
#include "base/stats/group.hh"
#include "base/types.hh"
@@ -47,7 +48,6 @@
#include "cpu/thread_state.hh"
#include "gpu-compute/compute_unit.hh"
#include "gpu-compute/gpu_dyn_inst.hh"
-#include "gpu-compute/gpu_tlb.hh"
#include "gpu-compute/hsa_queue_entry.hh"
#include "gpu-compute/lds_state.hh"
#include "mem/page_table.hh"
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/49847
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I34448bb4e5ddb9980b34a55bc717bbcea0e03db5
Gerrit-Change-Number: 49847
Gerrit-PatchSet: 1
Gerrit-Owner: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s