[PATCH] drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues

Marek Olšák Mon, 21 Jan 2019 15:47:02 -0800

From: Marek Olšák <marek.ol...@amd.com>

I'm not increasing the DRM version because GDS isn't totally without bugs yet.


Signed-off-by: Marek Olšák <marek.ol...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 17 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 17 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c   | 36 +++++++++++++++++++++++++
 include/uapi/drm/amdgpu_drm.h           |  5 ++++
 5 files changed, 77 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index ecbcefe49a98..f89f5734d985 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -30,20 +30,22 @@ struct amdgpu_bo;
 struct amdgpu_gds_asic_info {
        uint32_t        total_size;
        uint32_t        gfx_partition_size;
        uint32_t        cs_partition_size;
 };
 
 struct amdgpu_gds {
        struct amdgpu_gds_asic_info     mem;
        struct amdgpu_gds_asic_info     gws;
        struct amdgpu_gds_asic_info     oa;
+       uint32_t                        gds_compute_max_wave_id;
+
        /* At present, GDS, GWS and OA resources for gfx (graphics)
         * is always pre-allocated and available for graphics operation.
         * Such resource is shared between all gfx clients.
         * TODO: move this operation to user space
         * */
        struct amdgpu_bo*               gds_gfx_bo;
        struct amdgpu_bo*               gws_gfx_bo;
        struct amdgpu_bo*               oa_gfx_bo;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7984292f9282..d971ea914755 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2257,20 +2257,36 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 }
 
 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
                                          struct amdgpu_job *job,
                                          struct amdgpu_ib *ib,
                                          uint32_t flags)
 {
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+       /* Currently, there is a high possibility to get wave ID mismatch
+        * between ME and GDS, leading to a hw deadlock, because ME generates
+        * different wave IDs than the GDS expects. This situation happens
+        * randomly when at least 5 compute pipes use GDS ordered append.
+        * The wave IDs generated by ME are also wrong after suspend/resume.
+        * Those are probably bugs somewhere else in the kernel driver.
+        *
+        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+        * GDS to 0 for this ring (me/pipe).
+        */
+       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - 
PACKET3_SET_CONFIG_REG_START);
+               amdgpu_ring_write(ring, 
ring->adev->gds.gds_compute_max_wave_id);
+       }
+
        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
                                          (2 << 0) |
 #endif
                                          (ib->gpu_addr & 0xFFFFFFFC));
        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
        amdgpu_ring_write(ring, control);
 }
 
@@ -5050,20 +5066,21 @@ static void gfx_v7_0_set_irq_funcs(struct amdgpu_device 
*adev)
        adev->gfx.priv_inst_irq.num_types = 1;
        adev->gfx.priv_inst_irq.funcs = &gfx_v7_0_priv_inst_irq_funcs;
 }
 
 static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
 {
        /* init asci gds info */
        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
        adev->gds.gws.total_size = 64;
        adev->gds.oa.total_size = 16;
+       adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
 
        if (adev->gds.mem.total_size == 64 * 1024) {
                adev->gds.mem.gfx_partition_size = 4096;
                adev->gds.mem.cs_partition_size = 4096;
 
                adev->gds.gws.gfx_partition_size = 4;
                adev->gds.gws.cs_partition_size = 4;
 
                adev->gds.oa.gfx_partition_size = 4;
                adev->gds.oa.cs_partition_size = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a26747681ed6..dcdae74fc0e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6077,20 +6077,36 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 }
 
 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
                                          struct amdgpu_job *job,
                                          struct amdgpu_ib *ib,
                                          uint32_t flags)
 {
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+       /* Currently, there is a high possibility to get wave ID mismatch
+        * between ME and GDS, leading to a hw deadlock, because ME generates
+        * different wave IDs than the GDS expects. This situation happens
+        * randomly when at least 5 compute pipes use GDS ordered append.
+        * The wave IDs generated by ME are also wrong after suspend/resume.
+        * Those are probably bugs somewhere else in the kernel driver.
+        *
+        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+        * GDS to 0 for this ring (me/pipe).
+        */
+       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - 
PACKET3_SET_CONFIG_REG_START);
+               amdgpu_ring_write(ring, 
ring->adev->gds.gds_compute_max_wave_id);
+       }
+
        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
                                (2 << 0) |
 #endif
                                (ib->gpu_addr & 0xFFFFFFFC));
        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
        amdgpu_ring_write(ring, control);
 }
 
@@ -6989,20 +7005,21 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device 
*adev)
 {
        adev->gfx.rlc.funcs = &iceland_rlc_funcs;
 }
 
 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 {
        /* init asci gds info */
        adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
        adev->gds.gws.total_size = 64;
        adev->gds.oa.total_size = 16;
+       adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);
 
        if (adev->gds.mem.total_size == 64 * 1024) {
                adev->gds.mem.gfx_partition_size = 4096;
                adev->gds.mem.cs_partition_size = 4096;
 
                adev->gds.gws.gfx_partition_size = 4;
                adev->gds.gws.cs_partition_size = 4;
 
                adev->gds.oa.gfx_partition_size = 4;
                adev->gds.oa.cs_partition_size = 1;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 262ee3cf6f1c..63b898fc0467 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -4003,20 +4003,36 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct 
amdgpu_ring *ring,
 }
 
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
                                          struct amdgpu_job *job,
                                          struct amdgpu_ib *ib,
                                          uint32_t flags)
 {
        unsigned vmid = AMDGPU_JOB_GET_VMID(job);
        u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
 
+       /* Currently, there is a high possibility to get wave ID mismatch
+        * between ME and GDS, leading to a hw deadlock, because ME generates
+        * different wave IDs than the GDS expects. This situation happens
+        * randomly when at least 5 compute pipes use GDS ordered append.
+        * The wave IDs generated by ME are also wrong after suspend/resume.
+        * Those are probably bugs somewhere else in the kernel driver.
+        *
+        * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+        * GDS to 0 for this ring (me/pipe).
+        */
+       if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+               amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+               amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+               amdgpu_ring_write(ring, 
ring->adev->gds.gds_compute_max_wave_id);
+       }
+
        amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
        BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
        amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
                                (2 << 0) |
 #endif
                                lower_32_bits(ib->gpu_addr));
        amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
        amdgpu_ring_write(ring, control);
 }
@@ -4839,20 +4855,40 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device 
*adev)
                adev->gds.mem.total_size = 0x10000;
                break;
        case CHIP_RAVEN:
                adev->gds.mem.total_size = 0x1000;
                break;
        default:
                adev->gds.mem.total_size = 0x10000;
                break;
        }
 
+       switch (adev->asic_type) {
+       case CHIP_VEGA10:
+       case CHIP_VEGA20:
+               adev->gds.gds_compute_max_wave_id = 0x7ff;
+               break;
+       case CHIP_VEGA12:
+               adev->gds.gds_compute_max_wave_id = 0x27f;
+               break;
+       case CHIP_RAVEN:
+               if (adev->rev_id >= 0x8)
+                       adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
+               else
+                       adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
+               break;
+       default:
+               /* this really depends on the chip */
+               adev->gds.gds_compute_max_wave_id = 0x7ff;
+               break;
+       }
+
        adev->gds.gws.total_size = 64;
        adev->gds.oa.total_size = 16;
 
        if (adev->gds.mem.total_size == 64 * 1024) {
                adev->gds.mem.gfx_partition_size = 4096;
                adev->gds.mem.cs_partition_size = 4096;
 
                adev->gds.gws.gfx_partition_size = 4;
                adev->gds.gws.cs_partition_size = 4;
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index faaad04814e4..662d379ea624 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -561,20 +561,25 @@ union drm_amdgpu_cs {
 /* Preamble flag, which means the IB could be dropped if no context switch */
 #define AMDGPU_IB_FLAG_PREAMBLE (1<<1)
 
 /* Preempt flag, IB should set Pre_enb bit if PREEMPT flag detected */
 #define AMDGPU_IB_FLAG_PREEMPT (1<<2)
 
 /* The IB fence should do the L2 writeback but not invalidate any shader
  * caches (L2/vL1/sL1/I$). */
 #define AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE (1 << 3)
 
+/* Set GDS_COMPUTE_MAX_WAVE_ID = DEFAULT before PACKET3_INDIRECT_BUFFER.
+ * This will reset wave ID counters for the IB.
+ */
+#define AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID (1 << 4)
+
 struct drm_amdgpu_cs_chunk_ib {
        __u32 _pad;
        /** AMDGPU_IB_FLAG_* */
        __u32 flags;
        /** Virtual address to begin IB execution */
        __u64 va_start;
        /** Size of submission */
        __u32 ib_bytes;
        /** HW IP to submit to */
        __u32 ip_type;
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: add a workaround for GDS ordered append hangs with compute queues

Reply via email to