Not needed at least in windows kmd, I don't know if it is out of what reason, 
with my guess:

1, CI doesn’t support world switch, so no need for the SWITCH_BUFFER at end 
(only one SB at end is for world switch).
2, I don’t know if that limit can also applied on CI (limit is CE can at most 
ahead of DE by a certain number DW )

So I think CE can keep the original logic.

BR Monk


-----Original Message-----
From: Deucher, Alexander 
Sent: Thursday, August 25, 2016 9:33 PM
To: Liu, Monk <monk....@amd.com>; amd-gfx@lists.freedesktop.org
Cc: Liu, Monk <monk....@amd.com>
Subject: RE: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule

> -----Original Message-----
> From: amd-gfx [mailto:amd-gfx-boun...@lists.freedesktop.org] On Behalf 
> Of Monk Liu
> Sent: Thursday, August 25, 2016 1:58 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Liu, Monk
> Subject: [PATCH 1/2] drm/amdgpu:fix gfx ib schedule
> 
> for GFX 8, originally we use double switch_buffer to prevents CE go 
> ahead of DE, thus it can avoid VM fault in case of VM_flush not 
> finish. but double SWITCH_BUFFER drops performance, and world switch 
> preemption requires that only one SWITCH_BUFFER is needed at the end 
> of DMAframe.
> 
> to Pevent CE vm fault without double switch_buffer, we need to insert 
> 128 dw NOP after vm flush because CE and DE can have at most 128 DW 
> gap from hw perspective.
> 
> Even no context switch, SWITCH_BUFFER is encouraged to use to get 
> better CE performance.
> 
> Change-Id: Ifdabf23f97e74156c1f66dddd0918ea7ffcddb20
> Signed-off-by: Monk Liu <monk....@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c |  8 ++++++++  
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c  | 28 +++++++++++++-------------

What about gfx7?  Does that need similar fixes?

Alex

> --
>  3 files changed, 23 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index cb0098a..a935831 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -338,6 +338,7 @@ struct amdgpu_ring_funcs {
>       void (*end_use)(struct amdgpu_ring *ring);
>       void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, 
> uint32_t val);
>       void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset);
> +     void (*emit_switch_buffer) (struct amdgpu_ring *ring);
>  };
> 
>  /*
> @@ -2372,6 +2373,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring
> *ring)
>  #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs-
> >emit_hdp_invalidate((r))
>  #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), 
> (i), (v))  #define amdgpu_ring_emit_rreg(r, i) 
> (r)->funcs->emit_rreg((r), (i))
> +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs-
> >emit_switch_buffer((r))
>  #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))  
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))  
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs-
> >patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index a31d7ef..4e5b2f3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -210,6 +210,14 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> unsigned num_ibs,
>               amdgpu_ring_patch_cond_exec(ring, patch_offset);
> 
>       ring->current_ctx = ctx;
> +
> +     /* Insert one SB at the end of DMAframe,
> +      * Now only GFX8 ip need handling like this, CI won't
> +      * insert one SB at this place, instead it insert double
> +      * switch buffer after VM FLUSH and PIPELINE sync.
> +      */
> +     if (ring->funcs->emit_switch_buffer)
> +             amdgpu_ring_emit_switch_buffer(ring);
>       amdgpu_ring_commit(ring);
>       return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index dfa2288..41a2ef1 100755
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -5936,12 +5936,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct 
> amdgpu_ring *ring,  {
>       u32 header, control = 0;
> 
> -     /* insert SWITCH_BUFFER packet before first IB in the ring frame */
> -     if (ctx_switch) {
> -             amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -             amdgpu_ring_write(ring, 0);
> -     }
> -
>       if (ib->flags & AMDGPU_IB_FLAG_CE)
>               header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
>       else
> @@ -6013,11 +6007,9 @@ static void
> gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
>       amdgpu_ring_write(ring, 4); /* poll interval */
> 
>       if (usepfp) {
> -             /* synce CE with ME to prevent CE fetch CEIB before context
> switch done */
> -             amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -             amdgpu_ring_write(ring, 0);
> -             amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -             amdgpu_ring_write(ring, 0);
> +             /* sync PFP to ME, otherwise we might get invalid PFP reads
> */
> +             amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME,
> 0));
> +             amdgpu_ring_write(ring, 0x0);
>       }
>  }
> 
> @@ -6065,11 +6057,10 @@ static void gfx_v8_0_ring_emit_vm_flush(struct
> amdgpu_ring *ring,
>               /* sync PFP to ME, otherwise we might get invalid PFP reads */
>               amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
>               amdgpu_ring_write(ring, 0x0);
> -             amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -             amdgpu_ring_write(ring, 0);
> -             amdgpu_ring_write(ring,
> PACKET3(PACKET3_SWITCH_BUFFER, 0));
> -             amdgpu_ring_write(ring, 0);
>       }
> +
> +     /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush
> finish */
> +     amdgpu_ring_insert_nop(ring, 128);
>  }
> 
>  static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring) 
> @@ -6170,6 +6161,12 @@ static void gfx_v8_0_ring_emit_wreg_kiq(struct
> amdgpu_ring *ring, u32 idx, u32 v
>       amdgpu_ring_write(ring, val);
>  }
> 
> +static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) {
> +     amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
> +     amdgpu_ring_write(ring, 0);
> +}
> +
>  static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device 
> *adev,
>                                                enum
> amdgpu_interrupt_state state)
>  {
> @@ -6477,6 +6474,7 @@ static const struct amdgpu_ring_funcs 
> gfx_v8_0_ring_funcs_gfx = {
>       .test_ib = gfx_v8_0_ring_test_ib,
>       .insert_nop = amdgpu_ring_insert_nop,
>       .pad_ib = amdgpu_ring_generic_pad_ib,
> +     .emit_switch_buffer = gfx_v8_ring_emit_sb,
>  };
> 
>  static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
> --
> 1.9.1
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to