From: Alex Deucher <alexander.deuc...@amd.com>

Helpful for debugging GPUVM errors as we can see what
hw block and page generated the fault in the log.

v2: simplify fault decoding

Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
Reviewed-by: Christian K?nig <christian.koenig at amd.com>
---
 drivers/gpu/drm/radeon/si.c  |  272 +++++++++++++++++++++++++++++++++++++++++-
 drivers/gpu/drm/radeon/sid.h |   14 ++
 2 files changed, 284 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index f305768..d3f0507 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -4390,6 +4390,270 @@ void si_vm_fini(struct radeon_device *rdev)
 }

 /**
+ * si_vm_decode_fault - print human readable fault info
+ *
+ * @rdev: radeon_device pointer
+ * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
+ * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
+ *
+ * Print human readable fault information (SI).
+ */
+static void si_vm_decode_fault(struct radeon_device *rdev,
+                              u32 status, u32 addr)
+{
+       u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
+       u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
+       u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
+       char *block;
+
+       if (rdev->family == CHIP_TAHITI) {
+               switch (mc_id) {
+               case 160:
+               case 144:
+               case 96:
+               case 80:
+               case 224:
+               case 208:
+               case 32:
+               case 16:
+                       block = "CB";
+                       break;
+               case 161:
+               case 145:
+               case 97:
+               case 81:
+               case 225:
+               case 209:
+               case 33:
+               case 17:
+                       block = "CB_FMASK";
+                       break;
+               case 162:
+               case 146:
+               case 98:
+               case 82:
+               case 226:
+               case 210:
+               case 34:
+               case 18:
+                       block = "CB_CMASK";
+                       break;
+               case 163:
+               case 147:
+               case 99:
+               case 83:
+               case 227:
+               case 211:
+               case 35:
+               case 19:
+                       block = "CB_IMMED";
+                       break;
+               case 164:
+               case 148:
+               case 100:
+               case 84:
+               case 228:
+               case 212:
+               case 36:
+               case 20:
+                       block = "DB";
+                       break;
+               case 165:
+               case 149:
+               case 101:
+               case 85:
+               case 229:
+               case 213:
+               case 37:
+               case 21:
+                       block = "DB_HTILE";
+                       break;
+               case 167:
+               case 151:
+               case 103:
+               case 87:
+               case 231:
+               case 215:
+               case 39:
+               case 23:
+                       block = "DB_STEN";
+                       break;
+               case 72:
+               case 68:
+               case 64:
+               case 8:
+               case 4:
+               case 0:
+               case 136:
+               case 132:
+               case 128:
+               case 200:
+               case 196:
+               case 192:
+                       block = "TC";
+                       break;
+               case 112:
+               case 48:
+                       block = "CP";
+                       break;
+               case 49:
+               case 177:
+               case 50:
+               case 178:
+                       block = "SH";
+                       break;
+               case 53:
+               case 190:
+                       block = "VGT";
+                       break;
+               case 117:
+                       block = "IH";
+                       break;
+               case 51:
+               case 115:
+                       block = "RLC";
+                       break;
+               case 119:
+               case 183:
+                       block = "DMA0";
+                       break;
+               case 61:
+                       block = "DMA1";
+                       break;
+               case 248:
+               case 120:
+                       block = "HDP";
+                       break;
+               default:
+                       block = "unknown";
+                       break;
+               }
+       } else {
+               switch (mc_id) {
+               case 32:
+               case 16:
+               case 96:
+               case 80:
+               case 160:
+               case 144:
+               case 224:
+               case 208:
+                       block = "CB";
+                       break;
+               case 33:
+               case 17:
+               case 97:
+               case 81:
+               case 161:
+               case 145:
+               case 225:
+               case 209:
+                       block = "CB_FMASK";
+                       break;
+               case 34:
+               case 18:
+               case 98:
+               case 82:
+               case 162:
+               case 146:
+               case 226:
+               case 210:
+                       block = "CB_CMASK";
+                       break;
+               case 35:
+               case 19:
+               case 99:
+               case 83:
+               case 163:
+               case 147:
+               case 227:
+               case 211:
+                       block = "CB_IMMED";
+                       break;
+               case 36:
+               case 20:
+               case 100:
+               case 84:
+               case 164:
+               case 148:
+               case 228:
+               case 212:
+                       block = "DB";
+                       break;
+               case 37:
+               case 21:
+               case 101:
+               case 85:
+               case 165:
+               case 149:
+               case 229:
+               case 213:
+                       block = "DB_HTILE";
+                       break;
+               case 39:
+               case 23:
+               case 103:
+               case 87:
+               case 167:
+               case 151:
+               case 231:
+               case 215:
+                       block = "DB_STEN";
+                       break;
+               case 72:
+               case 68:
+               case 8:
+               case 4:
+               case 136:
+               case 132:
+               case 200:
+               case 196:
+                       block = "TC";
+                       break;
+               case 112:
+               case 48:
+                       block = "CP";
+                       break;
+               case 49:
+               case 177:
+               case 50:
+               case 178:
+                       block = "SH";
+                       break;
+               case 53:
+                       block = "VGT";
+                       break;
+               case 117:
+                       block = "IH";
+                       break;
+               case 51:
+               case 115:
+                       block = "RLC";
+                       break;
+               case 119:
+               case 183:
+                       block = "DMA0";
+                       break;
+               case 61:
+                       block = "DMA1";
+                       break;
+               case 248:
+               case 120:
+                       block = "HDP";
+                       break;
+               default:
+                       block = "unknown";
+                       break;
+               }
+       }
+
+       printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
+              protections, vmid, addr,
+              (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
+              block, mc_id);
+}
+
+/**
  * si_vm_set_page - update the page tables using the CP
  *
  * @rdev: radeon_device pointer
@@ -5755,6 +6019,7 @@ int si_irq_process(struct radeon_device *rdev)
        u32 ring_index;
        bool queue_hotplug = false;
        bool queue_thermal = false;
+       u32 status, addr;

        if (!rdev->ih.enabled || rdev->shutdown)
                return IRQ_NONE;
@@ -5990,11 +6255,14 @@ restart_ih:
                        break;
                case 146:
                case 147:
+                       addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
+                       status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
                        dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", 
src_id, src_data);
                        dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR 
  0x%08X\n",
-                               RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
+                               addr);
                        dev_err(rdev->dev, "  
VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
-                               RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
+                               status);
+                       si_vm_decode_fault(rdev, status, addr);
                        /* reset addr and status */
                        WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
                        break;
diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
index 12a20eb..2c8da27 100644
--- a/drivers/gpu/drm/radeon/sid.h
+++ b/drivers/gpu/drm/radeon/sid.h
@@ -367,6 +367,20 @@

 #define        VM_CONTEXT1_PROTECTION_FAULT_ADDR               0x14FC
 #define        VM_CONTEXT1_PROTECTION_FAULT_STATUS             0x14DC
+#define                PROTECTIONS_MASK                        (0xf << 0)
+#define                PROTECTIONS_SHIFT                       0
+               /* bit 0: range
+                * bit 1: pde0
+                * bit 2: valid
+                * bit 3: read
+                * bit 4: write
+                */
+#define                MEMORY_CLIENT_ID_MASK                   (0xff << 12)
+#define                MEMORY_CLIENT_ID_SHIFT                  12
+#define                MEMORY_CLIENT_RW_MASK                   (1 << 24)
+#define                MEMORY_CLIENT_RW_SHIFT                  24
+#define                FAULT_VMID_MASK                         (0xf << 25)
+#define                FAULT_VMID_SHIFT                        25

 #define VM_INVALIDATE_REQUEST                          0x1478
 #define VM_INVALIDATE_RESPONSE                         0x147c
-- 
1.7.7.5

Reply via email to