From: Christian König <christian.koe...@amd.com>

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
    Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
    amdgpu_cs_pass1()
    Only initialize shadow on first use
    (Alex)
v4: Pass parameters rather than job to new ring callback (Alex)

Signed-off-by: Christian König <christian.koe...@amd.com>
Signed-off-by: Alex Deucher <alexander.deuc...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 26 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 17 +++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  3 +++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..50cc117bc628 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
                case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
                case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+               case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
                        break;
 
                default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
        return 0;
 }
 
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+                              struct amdgpu_cs_chunk *chunk)
+{
+       struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+       int i;
+
+       if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+               return -EINVAL;
+
+       for (i = 0; i < p->gang_size; ++i) {
+               p->jobs[i]->shadow_va = shadow->shadow_va;
+               p->jobs[i]->csa_va = shadow->csa_va;
+               p->jobs[i]->gds_va = shadow->gds_va;
+               p->jobs[i]->init_shadow =
+                       shadow->flags & 
AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+       }
+
+       return 0;
+}
+
 static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 {
        unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
                        if (r)
                                return r;
                        break;
+               case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+                       r = amdgpu_cs_p2_shadow(p, chunk);
+                       if (r)
+                               return r;
+                       break;
                }
        }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index bcccc348dbe2..7fb8e6691d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        uint64_t fence_ctx;
        uint32_t status = 0, alloc_size;
        unsigned fence_flags = 0;
-       bool secure;
+       bool secure, init_shadow;
+       u64 shadow_va, csa_va, gds_va;
+       int vmid = AMDGPU_JOB_GET_VMID(job);
 
        unsigned i;
        int r = 0;
@@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
                vm = job->vm;
                fence_ctx = job->base.s_fence ?
                        job->base.s_fence->scheduled.context : 0;
+               shadow_va = job->shadow_va;
+               csa_va = job->csa_va;
+               gds_va = job->gds_va;
+               init_shadow = job->init_shadow;
        } else {
                vm = NULL;
                fence_ctx = 0;
+               shadow_va = 0;
+               csa_va = 0;
+               gds_va = 0;
+               init_shadow = false;
        }
 
        if (!ring->sched.ready && !ring->is_mes_queue) {
@@ -212,6 +222,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        }
 
        amdgpu_ring_ib_begin(ring);
+
+       if (job && ring->funcs->emit_gfx_shadow)
+               amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+                                           init_shadow, vmid);
+
        if (job && ring->funcs->init_cond_exec)
                patch_offset = amdgpu_ring_init_cond_exec(ring);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
        uint64_t                uf_addr;
        uint64_t                uf_sequence;
 
+       /* virtual addresses for shadow/GDS/CSA */
+       uint64_t                shadow_va;
+       uint64_t                csa_va;
+       uint64_t                gds_va;
+       bool                    init_shadow;
+
        /* job_run_counter >= 1 means a resubmit job */
        uint32_t                job_run_counter;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3989e755a5b4..7942cb62e52c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -212,6 +212,8 @@ struct amdgpu_ring_funcs {
        void (*end_use)(struct amdgpu_ring *ring);
        void (*emit_switch_buffer) (struct amdgpu_ring *ring);
        void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+       void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 
csa_va,
+                               u64 gds_va, bool init_shadow, int vmid);
        void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
                          uint32_t reg_val_offs);
        void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
@@ -307,6 +309,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
 #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
 #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) 
(r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v))
 #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
 #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
 #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), 
(d), (v), (m))
-- 
2.39.2

Reply via email to