Matthew Poremba has submitted this change. ( https://gem5-review.googlesource.com/c/public/gem5/+/42220 )

Change subject: gpu-compute, dev-hsa: Fix doorbell for gfx900
......................................................................

gpu-compute, dev-hsa: Fix doorbell for gfx900

gfx9 changed the size of the doorbell, and what the write index
is when the doorbell is rang. --gfx-version flag is used to set
the doorbell size

Change-Id: I48e4e57dc1c80a08133b17cdf3f92533b541f7c3
Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/42220
Tested-by: kokoro <noreply+kok...@google.com>
Reviewed-by: Matt Sinclair <mattdsincl...@gmail.com>
Maintainer: Matt Sinclair <mattdsincl...@gmail.com>
---
M src/dev/hsa/hsa_packet_processor.cc
M src/dev/hsa/hsa_packet_processor.hh
M src/dev/hsa/hw_scheduler.cc
M src/dev/hsa/hw_scheduler.hh
M src/gpu-compute/gpu_compute_driver.cc
M src/gpu-compute/gpu_compute_driver.hh
6 files changed, 47 insertions(+), 31 deletions(-)

Approvals:
  Matt Sinclair: Looks good to me, approved; Looks good to me, approved
  kokoro: Regressions pass



diff --git a/src/dev/hsa/hsa_packet_processor.cc b/src/dev/hsa/hsa_packet_processor.cc
index 9721d0e..cd6ef5a 100644
--- a/src/dev/hsa/hsa_packet_processor.cc
+++ b/src/dev/hsa/hsa_packet_processor.cc
@@ -91,22 +91,22 @@
 }

 void
-HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id)
+HSAPacketProcessor::unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
 {
-    hwSchdlr->unregisterQueue(queue_id);
+    hwSchdlr->unregisterQueue(queue_id, doorbellSize);
 }

 void
 HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
                                        uint64_t basePointer,
                                        uint64_t queue_id,
-                                       uint32_t size)
+                                       uint32_t size, int doorbellSize)
 {
     DPRINTF(HSAPacketProcessor,
              "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
              (void *)basePointer, queue_id, size);
     hwSchdlr->registerNewQueue(hostReadIndexPointer,
-                               basePointer, queue_id, size);
+                               basePointer, queue_id, size, doorbellSize);
 }

 AddrRangeList
@@ -133,7 +133,16 @@
           "%s: write of size %d to reg-offset %d (0x%x)\n",
           __FUNCTION__, pkt->getSize(), daddr, daddr);

-    uint32_t doorbell_reg = pkt->getLE<uint32_t>();
+    int doorbellSize = gpu_device->driver()->doorbellSize();
+    assert(doorbellSize == pkt->getSize());
+
+    uint64_t doorbell_reg(0);
+    if (pkt->getSize() == 8)
+        doorbell_reg = pkt->getLE<uint64_t>() + 1;
+    else if (pkt->getSize() == 4)
+        doorbell_reg = pkt->getLE<uint32_t>();
+    else
+        fatal("invalid db size");

     DPRINTF(HSAPacketProcessor,
             "%s: write data 0x%x to offset %d (0x%x)\n",
diff --git a/src/dev/hsa/hsa_packet_processor.hh b/src/dev/hsa/hsa_packet_processor.hh
index 7e8f6a5..fe71612 100644
--- a/src/dev/hsa/hsa_packet_processor.hh
+++ b/src/dev/hsa/hsa_packet_processor.hh
@@ -331,8 +331,8 @@
     void setDeviceQueueDesc(uint64_t hostReadIndexPointer,
                             uint64_t basePointer,
                             uint64_t queue_id,
-                            uint32_t size);
-    void unsetDeviceQueueDesc(uint64_t queue_id);
+                            uint32_t size, int doorbellSize);
+    void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize);
     void setDevice(GPUCommandProcessor * dev);
     void updateReadIndex(int, uint32_t);
     void getCommandsFromHost(int pid, uint32_t rl_idx);
diff --git a/src/dev/hsa/hw_scheduler.cc b/src/dev/hsa/hw_scheduler.cc
index b52e592..00dff99 100644
--- a/src/dev/hsa/hw_scheduler.cc
+++ b/src/dev/hsa/hw_scheduler.cc
@@ -84,18 +84,13 @@
 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
                               uint64_t basePointer,
                               uint64_t queue_id,
-                              uint32_t size)
+                              uint32_t size, int doorbellSize)
 {
     assert(queue_id < MAX_ACTIVE_QUEUES);
     // Map queue ID to doorbell.
     // We are only using offset to pio base address as doorbell
// We use the same mapping function used by hsa runtime to do this mapping
-    //
-    // Originally
-    // #define VOID_PTR_ADD32(ptr,n)
-    //     (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
-    // (Addr)VOID_PTR_ADD32(0, queue_id)
-    Addr db_offset = sizeof(uint32_t)*queue_id;
+    Addr db_offset = queue_id * doorbellSize;
     if (dbMap.find(db_offset) != dbMap.end()) {
         panic("Creating an already existing queue (queueID %d)", queue_id);
     }
@@ -318,7 +313,7 @@
 }

 void
-HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
+HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
 {
     auto dbmap_iter = dbMap.find(db_addr);
     if (dbmap_iter == dbMap.end()) {
@@ -335,17 +330,9 @@
 }

 void
-HWScheduler::unregisterQueue(uint64_t queue_id)
+HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
 {
-    // Pointer arithmetic on a null pointer is undefined behavior. Clang
-    // compilers therefore complain if the following reads:
-    // `(Addr)(VOID_PRT_ADD32(0, queue_id))`
-    //
-    // Originally
-    // #define VOID_PTR_ADD32(ptr,n)
-    //     (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
-    // (Addr)VOID_PTR_ADD32(0, queue_id)
-    Addr db_offset = sizeof(uint32_t)*queue_id;
+    Addr db_offset = queue_id * doorbellSize;
     auto dbmap_iter = dbMap.find(db_offset);
     if (dbmap_iter == dbMap.end()) {
         panic("Destroying a non-existing queue (db_offset %x)",
diff --git a/src/dev/hsa/hw_scheduler.hh b/src/dev/hsa/hw_scheduler.hh
index f80e893..c4fe66c 100644
--- a/src/dev/hsa/hw_scheduler.hh
+++ b/src/dev/hsa/hw_scheduler.hh
@@ -52,12 +52,12 @@
                : hsaPP(hsa_pp), nextALId(0), nextRLId(0),
                  wakeupDelay(wakeup_delay), schedWakeupEvent(this)
     {}
-    void write(Addr db_addr, uint32_t doorbell_reg);
+    void write(Addr db_addr, uint64_t doorbell_reg);
     void registerNewQueue(uint64_t hostReadIndexPointer,
                           uint64_t basePointer,
                           uint64_t queue_id,
-                          uint32_t size);
-    void unregisterQueue(uint64_t queue_id);
+                          uint32_t size, int doorbellSize);
+    void unregisterQueue(uint64_t queue_id, int doorbellSize);
     void wakeup();
     void schedWakeup();
     class SchedulerWakeupEvent : public Event
diff --git a/src/gpu-compute/gpu_compute_driver.cc b/src/gpu-compute/gpu_compute_driver.cc
index 2552b76..8c79be3 100644
--- a/src/gpu-compute/gpu_compute_driver.cc
+++ b/src/gpu-compute/gpu_compute_driver.cc
@@ -149,18 +149,22 @@
     TypedBufferArg<kfd_ioctl_create_queue_args> args(ioc_buf);
     args.copyIn(mem_proxy);

-    if ((sizeof(uint32_t) * queueId) > 4096) {
+    if ((doorbellSize() * queueId) > 4096) {
fatal("%s: Exceeded maximum number of HSA queues allowed\n", name());
     }

     args->doorbell_offset = (KFD_MMAP_TYPE_DOORBELL |
         KFD_MMAP_GPU_ID(args->gpu_id)) << PAGE_SHIFT;

+    // for vega offset needs to include exact value of doorbell
+    if (doorbellSize())
+        args->doorbell_offset += queueId * doorbellSize();
+
     args->queue_id = queueId++;
     auto &hsa_pp = device->hsaPacketProc();
     hsa_pp.setDeviceQueueDesc(args->read_pointer_address,
                               args->ring_base_address, args->queue_id,
-                              args->ring_size);
+                              args->ring_size, doorbellSize());
     args.copyOut(mem_proxy);
 }

@@ -243,7 +247,8 @@
             args.copyIn(virt_proxy);
             DPRINTF(GPUDriver, "ioctl: AMDKFD_IOC_DESTROY_QUEUE;" \
                     "queue offset %d\n", args->queue_id);
-            device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id);
+            device->hsaPacketProc().unsetDeviceQueueDesc(args->queue_id,
+                                                         doorbellSize());
           }
           break;
         case AMDKFD_IOC_SET_MEMORY_POLICY:
diff --git a/src/gpu-compute/gpu_compute_driver.hh b/src/gpu-compute/gpu_compute_driver.hh
index ebaab27..922a699 100644
--- a/src/gpu-compute/gpu_compute_driver.hh
+++ b/src/gpu-compute/gpu_compute_driver.hh
@@ -81,6 +81,21 @@
      */
     void setMtype(RequestPtr req);

+    int
+    doorbellSize()
+    {
+        switch (gfxVersion) {
+          case GfxVersion::gfx801:
+          case GfxVersion::gfx803:
+            return 4;
+          case GfxVersion::gfx900:
+            return 8;
+          default:
+            fatal("Invalid GPU type\n");
+        }
+        return 4;
+    }
+
     class DriverWakeupEvent : public Event
     {
       public:



6 is the latest approved patch-set.
No files were changed between the latest approved patch-set and the submitted one.
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/42220
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I48e4e57dc1c80a08133b17cdf3f92533b541f7c3
Gerrit-Change-Number: 42220
Gerrit-PatchSet: 22
Gerrit-Owner: Alex Dutu <alexandru.d...@amd.com>
Gerrit-Reviewer: Matt Sinclair <mattdsincl...@gmail.com>
Gerrit-Reviewer: Matthew Poremba <matthew.pore...@amd.com>
Gerrit-Reviewer: kokoro <noreply+kok...@google.com>
Gerrit-CC: Kyle Roarty <kyleroarty1...@gmail.com>
Gerrit-MessageType: merged
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to