Matthew Poremba has submitted this change. (
https://gem5-review.googlesource.com/c/public/gem5/+/42220 )
Change subject: gpu-compute, dev-hsa: Fix doorbell for gfx900
......................................................................
gpu-compute, dev-hsa: Fix doorbell for gfx900
gfx9 changed the size of the doorbell, and what the write index
is when the doorbell is rang. --gfx-version flag is used to set
the doorbell size
Change-Id: I48e4e57dc1c80a08133b17cdf3f92533b541f7c3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42220
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
---
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
M src/dev/hsa/hw_scheduler.cc
M src/dev/hsa/hw_scheduler.hh
M src/gpu-compute/gpu_compute_driver.cc
M src/gpu-compute/gpu_compute_driver.hh
6 files changed, 47 insertions(+), 31 deletions(-)
Approvals:
Matt Sinclair: Looks good to me, approved; Looks good to me, approved
kokoro: Regressions pass
diff --git a/src/dev/hsa/hsa_packet_processor.cc
b/src/dev/hsa/hsa_packet_processor.cc
index 9721d0e..cd6ef5a 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -91,22 +91,22 @@
}
void
-HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id)
+HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id, int
doorbellSize)
{
- hwSchdlr->unregisterQueue(queue_id);
+ hwSchdlr->unregisterQueue(queue_id, doorbellSize);
}
void
HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
- uint32_t size)
+ uint32_t size, int doorbellSize)
{
DPRINTF(HSAPacketProcessor,
"%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
(void *)basePointer, queue_id, size);
hwSchdlr->registerNewQueue(hostReadIndexPointer,
- basePointer, queue_id, size);
+ basePointer, queue_id, size, doorbellSize);
}
AddrRangeList
@@ -133,7 +133,16 @@
"%s: write of size %d to reg-offset %d (0x%x)\n",
__FUNCTION__, pkt->getSize(), daddr, daddr);
- uint32_t doorbell_reg = pkt->getLE<uint32_t>();
+ int doorbellSize = gpu_device->driver()->doorbellSize();
+ assert(doorbellSize == pkt->getSize());
+
+ uint64_t doorbell_reg(0);
+ if (pkt->getSize() == 8)
+ doorbell_reg = pkt->getLE<uint64_t>() + 1;
+ else if (pkt->getSize() == 4)
+ doorbell_reg = pkt->getLE<uint32_t>();
+ else
+ fatal("invalid db size");
DPRINTF(HSAPacketProcessor,
"%s: write data 0x%x to offset %d (0x%x)\n",
diff --git a/src/dev/hsa/hsa_packet_processor.hh
b/src/dev/hsa/hsa_packet_processor.hh
index 7e8f6a5..fe71612 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -331,8 +331,8 @@
void setDeviceQueueDesc(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
- uint32_t size);
- void unsetDeviceQueueDesc(uint64_t queue_id);
+ uint32_t size, int doorbellSize);
+ void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize);
void setDevice(GPUCommandProcessor * dev);
void updateReadIndex(int, uint32_t);
void getCommandsFromHost(int pid, uint32_t rl_idx);
diff --git a/src/dev/hsa/hw_scheduler.cc b/src/dev/hsa/hw_scheduler.cc
index b52e592..00dff99 100644
--- a/src/dev/hsa/hw_scheduler.cc
+++ b/src/dev/hsa/hw_scheduler.cc
@@ -84,18 +84,13 @@
HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
- uint32_t size)
+ uint32_t size, int doorbellSize)
{
assert(queue_id < MAX_ACTIVE_QUEUES);
// Map queue ID to doorbell.
// We are only using offset to pio base address as doorbell
// We use the same mapping function used by hsa runtime to do this
mapping
- //
- // Originally
- // #define VOID_PTR_ADD32(ptr,n)
- // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
- // (Addr)VOID_PTR_ADD32(0, queue_id)
- Addr db_offset = sizeof(uint32_t)*queue_id;
+ Addr db_offset = queue_id * doorbellSize;
if (dbMap.find(db_offset) != dbMap.end()) {
panic("Creating an already existing queue (queueID %d)", queue_id);
}
@@ -318,7 +313,7 @@
}
void
-HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
+HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
{
auto dbmap_iter = dbMap.find(db_addr);
if (dbmap_iter == dbMap.end()) {
@@ -335,17 +330,9 @@
}
void
-HWScheduler::unregisterQueue(uint64_t queue_id)
+HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
{
- // Pointer arithmetic on a null pointer is undefined behavior. Clang
- // compilers therefore complain if the following reads:
- // `(Addr)(VOID_PRT_ADD32(0, queue_id))`
- //
- // Originally
- // #define VOID_PTR_ADD32(ptr,n)
- // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
- // (Addr)VOID_PTR_ADD32(0, queue_id)
- Addr db_offset = sizeof(uint32_t)*queue_id;
+ Addr db_offset = queue_id * doorbellSize;
auto dbmap_iter = dbMap.find(db_offset);
if (dbmap_iter == dbMap.end()) {
panic("Destroying a non-existing queue (db_offset %x)",
diff --git a/src/dev/hsa/hw_scheduler.hh b/src/dev/hsa/hw_scheduler.hh
index f80e893..c4fe66c 100644
--- a/src/dev/hsa/hw_scheduler.hh
+++ b/src/dev/hsa/hw_scheduler.hh
@@ -52,12 +52,12 @@
: hsaPP(hsa_pp), nextALId(0), nextRLId(0),
wakeupDelay(wakeup_delay), schedWakeupEvent(this)
{}
- void write(Addr db_addr, uint32_t doorbell_reg);
+ void write(Addr db_addr, uint64_t doorbell_reg);
void registerNewQueue(uint64_t hostReadIndexPointer,
uint64_t basePointer,
uint64_t queue_id,
- uint32_t size);
- void unregisterQueue(uint64_t queue_id);
+ uint32_t size, int doorbellSize);
+ void unregisterQueue(uint64_t queue_id, int doorbellSize);
void wakeup();
void schedWakeup();
class SchedulerWakeupEvent : public Event
diff --git a/src/gpu-compute/gpu_compute_driver.cc
b/src/gpu-compute/gpu_compute_driver.cc
index 2552b76..8c79be3 100644
--- a/src/gpu-compute/gpu_compute_driver.cc
+++ b/src/gpu-compute/gpu_compute_driver.cc
@@ -149,18 +149,22 @@
TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);
args.copyIn(mem_proxy);
- if ((sizeof(uint32_t) * queueId) > 4096) {
+ if ((doorbellSize() * queueId) > 4096) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n",
name());
}
args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;
+ // for vega offset needs to include exact value of doorbell
+ if (doorbellSize())
+ args->doorbell_offset += queueId * doorbellSize();
+
args->queue_id = queueId++;
auto &hsa_pp = device->hsaPacketProc();
hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
args->ring_base_address, args->queue_id,
- args->ring_size);
+ args->ring_size, doorbellSize());
args.copyOut(mem_proxy);
}
@@ -243,7 +247,8 @@
args.copyIn(virt_proxy);
DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
"queue offset %d\n", args->queue_id);
- device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id);
+ device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
+ doorbellSize());
}
break;
case AMDKFD_IOC_SET_MEMORY_POLICY:
diff --git a/src/gpu-compute/gpu_compute_driver.hh
b/src/gpu-compute/gpu_compute_driver.hh
index ebaab27..922a699 100644
--- a/src/gpu-compute/gpu_compute_driver.hh
+++ b/src/gpu-compute/gpu_compute_driver.hh
@@ -81,6 +81,21 @@
*/
void setMtype(RequestPtr req);
+ int
+ doorbellSize()
+ {
+ switch (gfxVersion) {
+ case GfxVersion::gfx801:
+ case GfxVersion::gfx803:
+ return 4;
+ case GfxVersion::gfx900:
+ return 8;
+ default:
+ fatal("Invalid GPU type\n");
+ }
+ return 4;
+ }
+
class DriverWakeupEvent : public Event
{
public:
6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the
submitted one.
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42220
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I48e4e57dc1c80a08133b17cdf3f92533b541f7c3
Gerrit-Change-Number: 42220
Gerrit-PatchSet: 22
Gerrit-Owner: Alex Dutu <alexandru.d...@amd.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-CC: Kyle Roarty <kyleroarty1...@gmail.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s