From: Monk Liu <monk....@amd.com>

use ctx pointer is not safe, cuz they are likely already
be assigned to another ctx when doing comparing.

fence_context is always increasing and have rare chance
to overback to used number for jobs that scheduled to
ring continueonsly

Signed-off-by: Monk Liu <Monk.Liu at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c        |  1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c        | 14 +++++++-------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c       |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h       |  2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c         |  2 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c         | 11 +++++------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c         | 11 +++++------
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c        |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c        |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c         |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c         |  2 +-
 drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c         |  2 +-
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.c |  1 +
 drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  1 +
 15 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2ee99dc..9d54d76 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -283,7 +283,7 @@ struct amdgpu_ring_funcs {
        int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
        /* command emit functions */
        void (*emit_ib)(struct amdgpu_ring *ring,
-                       struct amdgpu_ib *ib);
+                       struct amdgpu_ib *ib, bool ctx_switch);
        void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
                           uint64_t seq, unsigned flags);
        void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@@ -742,7 +742,6 @@ struct amdgpu_ib {
        struct amdgpu_user_fence        *user;
        unsigned                        vm_id;
        uint64_t                        vm_pd_addr;
-       struct amdgpu_ctx               *ctx;
        uint32_t                        gds_base, gds_size;
        uint32_t                        gws_base, gws_size;
        uint32_t                        oa_base, oa_size;
@@ -805,7 +804,7 @@ struct amdgpu_ring {
        unsigned                wptr_offs;
        unsigned                next_rptr_offs;
        unsigned                fence_offs;
-       struct amdgpu_ctx       *current_ctx;
+       uint64_t last_fence_context;
        enum amdgpu_ring_type   type;
        char                    name[16];
        unsigned                cond_exe_offs;
@@ -1253,6 +1252,7 @@ struct amdgpu_job {
        struct fence            *fence; /* the hw fence */
        uint32_t                num_ibs;
        void                    *owner;
+       uint64_t                        fence_context;
        struct amdgpu_user_fence uf;
 };
 #define to_amdgpu_job(sched_job)               \
@@ -2219,7 +2219,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
+#define amdgpu_ring_emit_ib(r, ib, f) (r)->funcs->emit_ib((r), (ib), (f))
 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) 
(r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) 
(r)->funcs->emit_fence((r), (addr), (seq), (flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1a06596..8c3bf63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -741,7 +741,6 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,

                ib->length_dw = chunk_ib->ib_bytes / 4;
                ib->flags = chunk_ib->flags;
-               ib->ctx = parser->ctx;
                j++;
        }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 0ed6430..1693fc7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -120,7 +120,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
 {
        struct amdgpu_device *adev = ring->adev;
        struct amdgpu_ib *ib = &ibs[0];
-       struct amdgpu_ctx *ctx, *old_ctx;
+       uint64_t fence_context = 0, old = ring->last_fence_context;
        struct fence *hwf;
        struct amdgpu_vm *vm = NULL;
        unsigned i, patch_offset = ~0;
@@ -130,9 +130,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        if (num_ibs == 0)
                return -EINVAL;

-       ctx = ibs->ctx;
-       if (job) /* for domain0 job like ring test, ibs->job is not assigned */
+       if (job) {/* for domain0 job like ring test, ibs->job is not assigned */
                vm = job->vm;
+               fence_context = job->fence_context;
+       }

        if (!ring->ready) {
                dev_err(adev->dev, "couldn't schedule ib\n");
@@ -171,12 +172,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        /* always set cond_exec_polling to CONTINUE */
        *ring->cond_exe_cpu_addr = 1;

-       old_ctx = ring->current_ctx;
        for (i = 0; i < num_ibs; ++i) {
                ib = &ibs[i];
-               amdgpu_ring_emit_ib(ring, ib);
-               ring->current_ctx = ctx;
+               amdgpu_ring_emit_ib(ring, ib, (i == 0 && old != fence_context));
        }
+       ring->last_fence_context = fence_context;

        if (vm) {
                if (ring->funcs->emit_hdp_invalidate)
@@ -186,7 +186,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
        r = amdgpu_fence_emit(ring, &hwf);
        if (r) {
                dev_err(adev->dev, "failed to emit fence (%d)\n", r);
-               ring->current_ctx = old_ctx;
+               ring->last_fence_context = old;
                if (ib->vm_id)
                        amdgpu_vm_reset_id(adev, ib->vm_id);
                amdgpu_ring_undo(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 77c6c08..1fabf15 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -761,7 +761,7 @@ out:
  * @ib: the IB to execute
  *
  */
-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, 
bool ctx_switch)
 {
        amdgpu_ring_write(ring, VCE_CMD_IB);
        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ef99d23..40d0650 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -34,7 +34,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, 
uint32_t handle,
                               bool direct, struct fence **fence);
 void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file 
*filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
+void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, 
bool ctx_switch);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
                                unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 8d69c65..d54fb19 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -210,7 +210,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring 
*ring, uint32_t count)
  * Schedule an IB in the DMA ring (CIK).
  */
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
-                          struct amdgpu_ib *ib)
+                          struct amdgpu_ib *ib, bool ctx_switch)
 {
        u32 extra_bits = ib->vm_id & 0xf;
        u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 0310890..9813819 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2029,17 +2029,16 @@ static void gfx_v7_0_ring_emit_fence_compute(struct 
amdgpu_ring *ring,
  * on the gfx ring for execution by the GPU.
  */
 static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
-       bool need_ctx_switch = ring->current_ctx != ib->ctx;
        u32 header, control = 0;
        u32 next_rptr = ring->wptr + 5;

        /* drop the CE preamble IB for the same context */
-       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
+       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !ctx_switch)
                return;

-       if (need_ctx_switch)
+       if (ctx_switch)
                next_rptr += 2;

        next_rptr += 4;
@@ -2050,7 +2049,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
        amdgpu_ring_write(ring, next_rptr);

        /* insert SWITCH_BUFFER packet before first IB in the ring frame */
-       if (need_ctx_switch) {
+       if (ctx_switch) {
                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
                amdgpu_ring_write(ring, 0);
        }
@@ -2073,7 +2072,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 }

 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
        u32 header, control = 0;
        u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index a82945f..064f255 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -5644,17 +5644,16 @@ static void gfx_v8_0_ring_emit_hdp_invalidate(struct 
amdgpu_ring *ring)
 }

 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
-       bool need_ctx_switch = ring->current_ctx != ib->ctx;
        u32 header, control = 0;
        u32 next_rptr = ring->wptr + 5;

        /* drop the CE preamble IB for the same context */
-       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
+       if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !ctx_switch)
                return;

-       if (need_ctx_switch)
+       if (ctx_switch)
                next_rptr += 2;

        next_rptr += 4;
@@ -5665,7 +5664,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
        amdgpu_ring_write(ring, next_rptr);

        /* insert SWITCH_BUFFER packet before first IB in the ring frame */
-       if (need_ctx_switch) {
+       if (ctx_switch) {
                amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
                amdgpu_ring_write(ring, 0);
        }
@@ -5688,7 +5687,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring 
*ring,
 }

 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
        u32 header, control = 0;
        u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 27ca46d..abe39024 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -242,7 +242,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring 
*ring, uint32_t count)
  * Schedule an IB in the DMA ring (VI).
  */
 static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
-                                  struct amdgpu_ib *ib)
+                                  struct amdgpu_ib *ib, bool ctx_switch)
 {
        u32 vmid = ib->vm_id & 0xf;
        u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 278b1fe..e222762 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -400,7 +400,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring 
*ring, uint32_t count)
  * Schedule an IB in the DMA ring (VI).
  */
 static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
-                                  struct amdgpu_ib *ib)
+                                  struct amdgpu_ib *ib, bool ctx_switch)
 {
        u32 vmid = ib->vm_id & 0xf;
        u32 next_rptr = ring->wptr + 5;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index abd37a7..b9c2a49 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -489,7 +489,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
        amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0));
        amdgpu_ring_write(ring, ib->gpu_addr);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 1c1a0e2c..465b77c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -539,7 +539,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
        amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index d015cb0..c2f790b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -631,7 +631,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
  * Write ring commands to execute the indirect buffer
  */
 static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
-                                 struct amdgpu_ib *ib)
+                                 struct amdgpu_ib *ib, bool ctx_switch)
 {
        amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0));
        amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index c16248c..a5d736c 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -392,6 +392,7 @@ int amd_sched_job_init(struct amd_sched_job *job,
        kref_init(&job->refcount);
        job->sched = sched;
        job->s_entity = entity;
+       job->fence_context = entity->fence_context;
        job->s_fence = amd_sched_fence_create(entity, owner);
        if (!job->s_fence)
                return -ENOMEM;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 169f70f..5561020 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -87,6 +87,7 @@ struct amd_sched_job {
        struct work_struct             work_free_job;
        struct list_head                           node;
        struct delayed_work work_tdr;
+       uint64_t                fence_context;
        void (*timeout_callback) (struct work_struct *work);
        void (*free_callback)(struct kref *refcount);
 };
-- 
2.5.5

Reply via email to