Re: [Intel-gfx] [PATCH 19/21] drm/i915/gt: Use indices for writing into relative timelines

2020-12-10 Thread Matthew Brost
On Thu, Dec 10, 2020 at 09:05:44PM +, Chris Wilson wrote:
> Quoting Matthew Brost (2020-12-10 19:16:44)
> > On Thu, Dec 10, 2020 at 08:02:38AM +, Chris Wilson wrote:
> > > Relative timelines are relative to either the global or per-process
> > > HWSP, and so we can replace the absolute addressing with store-index
> > > variants for position invariance.
> > > 
> > 
> > Can you explain the benifit of relative addressing? Why can't we also
> > use absolute? If we can always use absolute, I don't see the point
> > complicating the breadcrumb code.
> 
> It basically allows a third party to move the contexts between hosts
> with far less patching of global state. They want us to avoid all fixed
> GGTT addressing.
> 
> The breadcrumbs themselves do not notice at all, it's just the timeline
> setup and decision to take advantage of the relative commands. The
> breadcrumb patches in this series are some outstanding fixes from ~6
> months ago.

By breadcrumbs, I meant the emit code. Relative addressing for GVT makes
sense.

With that, for this patch:
Reviewed-by: Matthew Brost 

> -Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 19/21] drm/i915/gt: Use indices for writing into relative timelines

2020-12-10 Thread Chris Wilson
Quoting Matthew Brost (2020-12-10 19:16:44)
> On Thu, Dec 10, 2020 at 08:02:38AM +, Chris Wilson wrote:
> > Relative timelines are relative to either the global or per-process
> > HWSP, and so we can replace the absolute addressing with store-index
> > variants for position invariance.
> > 
> 
> Can you explain the benifit of relative addressing? Why can't we also
> use absolute? If we can always use absolute, I don't see the point
> complicating the breadcrumb code.

It basically allows a third party to move the contexts between hosts
with far less patching of global state. They want us to avoid all fixed
GGTT addressing.

The breadcrumbs themselves do not notice at all, it's just the timeline
setup and decision to take advantage of the relative commands. The
breadcrumb patches in this series are some outstanding fixes from ~6
months ago.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 19/21] drm/i915/gt: Use indices for writing into relative timelines

2020-12-10 Thread Matthew Brost
On Thu, Dec 10, 2020 at 08:02:38AM +, Chris Wilson wrote:
> Relative timelines are relative to either the global or per-process
> HWSP, and so we can replace the absolute addressing with store-index
> variants for position invariance.
> 

Can you explain the benifit of relative addressing? Why can't we also
use absolute? If we can always use absolute, I don't see the point
complicating the breadcrumb code.

Matt

> Signed-off-by: Chris Wilson 
> ---
>  drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 98 +---
>  drivers/gpu/drm/i915/gt/intel_timeline.h | 12 +++
>  2 files changed, 82 insertions(+), 28 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
> b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> index ed88dc4de72c..386da26816d0 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
> @@ -502,7 +502,19 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, 
> u32 *cs)
>  
>  static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
>  {
> - return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
> + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
> + unsigned int flags = MI_FLUSH_DW_OP_STOREDW;
> + u32 offset = hwsp_offset(rq);
> +
> + if (intel_timeline_is_relative(tl)) {
> + offset = offset_in_page(offset);
> + flags |= MI_FLUSH_DW_STORE_INDEX;
> + }
> + GEM_BUG_ON(offset & 7);
> + if (intel_timeline_is_global(tl))
> + offset |= MI_FLUSH_DW_USE_GTT;
> +
> + return __gen8_emit_flush_dw(cs, rq->fence.seqno, offset, flags);
>  }
>  
>  u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
> @@ -512,6 +524,18 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request 
> *rq, u32 *cs)
>  
>  u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  {
> + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
> + unsigned int flags = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL;
> + u32 offset = hwsp_offset(rq);
> +
> + if (intel_timeline_is_relative(tl)) {
> + offset = offset_in_page(offset);
> + flags |= PIPE_CONTROL_STORE_DATA_INDEX;
> + }
> + GEM_BUG_ON(offset & 7);
> + if (intel_timeline_is_global(tl))
> + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +
>   cs = gen8_emit_pipe_control(cs,
>   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
>   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> @@ -519,26 +543,33 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request 
> *rq, u32 *cs)
>   0);
>  
>   /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
> - cs = gen8_emit_ggtt_write_rcs(cs,
> -   rq->fence.seqno,
> -   hwsp_offset(rq),
> -   PIPE_CONTROL_FLUSH_ENABLE |
> -   PIPE_CONTROL_CS_STALL);
> + cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags);
>  
>   return gen8_emit_fini_breadcrumb_tail(rq, cs);
>  }
>  
>  u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  {
> - cs = gen8_emit_ggtt_write_rcs(cs,
> -   rq->fence.seqno,
> -   hwsp_offset(rq),
> -   PIPE_CONTROL_CS_STALL |
> -   PIPE_CONTROL_TILE_CACHE_FLUSH |
> -   PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> -   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> -   PIPE_CONTROL_DC_FLUSH_ENABLE |
> -   PIPE_CONTROL_FLUSH_ENABLE);
> + struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
> + u32 offset = hwsp_offset(rq);
> + unsigned int flags;
> +
> + flags = (PIPE_CONTROL_CS_STALL |
> +  PIPE_CONTROL_TILE_CACHE_FLUSH |
> +  PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
> +  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
> +  PIPE_CONTROL_DC_FLUSH_ENABLE |
> +  PIPE_CONTROL_FLUSH_ENABLE);
> +
> + if (intel_timeline_is_relative(tl)) {
> + offset = offset_in_page(offset);
> + flags |= PIPE_CONTROL_STORE_DATA_INDEX;
> + }
> + GEM_BUG_ON(offset & 7);
> + if (intel_timeline_is_global(tl))
> + flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
> +
> + cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags);
>  
>   return gen8_emit_fini_breadcrumb_tail(rq, cs);
>  }
> @@ -601,19 +632,30 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request 
> *rq, u32 *cs)
>  
>  u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
>  {
> - cs = gen12_emit_ggtt_write_rcs(cs,
> -  

[Intel-gfx] [PATCH 19/21] drm/i915/gt: Use indices for writing into relative timelines

2020-12-10 Thread Chris Wilson
Relative timelines are relative to either the global or per-process
HWSP, and so we can replace the absolute addressing with store-index
variants for position invariance.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/gt/gen8_engine_cs.c | 98 +---
 drivers/gpu/drm/i915/gt/intel_timeline.h | 12 +++
 2 files changed, 82 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c 
b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index ed88dc4de72c..386da26816d0 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -502,7 +502,19 @@ gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, 
u32 *cs)
 
 static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs)
 {
-   return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0);
+   struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+   unsigned int flags = MI_FLUSH_DW_OP_STOREDW;
+   u32 offset = hwsp_offset(rq);
+
+   if (intel_timeline_is_relative(tl)) {
+   offset = offset_in_page(offset);
+   flags |= MI_FLUSH_DW_STORE_INDEX;
+   }
+   GEM_BUG_ON(offset & 7);
+   if (intel_timeline_is_global(tl))
+   offset |= MI_FLUSH_DW_USE_GTT;
+
+   return __gen8_emit_flush_dw(cs, rq->fence.seqno, offset, flags);
 }
 
 u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs)
@@ -512,6 +524,18 @@ u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request 
*rq, u32 *cs)
 
 u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
+   struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+   unsigned int flags = PIPE_CONTROL_FLUSH_ENABLE | PIPE_CONTROL_CS_STALL;
+   u32 offset = hwsp_offset(rq);
+
+   if (intel_timeline_is_relative(tl)) {
+   offset = offset_in_page(offset);
+   flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+   }
+   GEM_BUG_ON(offset & 7);
+   if (intel_timeline_is_global(tl))
+   flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
cs = gen8_emit_pipe_control(cs,
PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
@@ -519,26 +543,33 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request 
*rq, u32 *cs)
0);
 
/* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */
-   cs = gen8_emit_ggtt_write_rcs(cs,
- rq->fence.seqno,
- hwsp_offset(rq),
- PIPE_CONTROL_FLUSH_ENABLE |
- PIPE_CONTROL_CS_STALL);
+   cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags);
 
return gen8_emit_fini_breadcrumb_tail(rq, cs);
 }
 
 u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
-   cs = gen8_emit_ggtt_write_rcs(cs,
- rq->fence.seqno,
- hwsp_offset(rq),
- PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_TILE_CACHE_FLUSH |
- PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
- PIPE_CONTROL_DEPTH_CACHE_FLUSH |
- PIPE_CONTROL_DC_FLUSH_ENABLE |
- PIPE_CONTROL_FLUSH_ENABLE);
+   struct intel_timeline *tl = rcu_dereference_protected(rq->timeline, 1);
+   u32 offset = hwsp_offset(rq);
+   unsigned int flags;
+
+   flags = (PIPE_CONTROL_CS_STALL |
+PIPE_CONTROL_TILE_CACHE_FLUSH |
+PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH |
+PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+PIPE_CONTROL_DC_FLUSH_ENABLE |
+PIPE_CONTROL_FLUSH_ENABLE);
+
+   if (intel_timeline_is_relative(tl)) {
+   offset = offset_in_page(offset);
+   flags |= PIPE_CONTROL_STORE_DATA_INDEX;
+   }
+   GEM_BUG_ON(offset & 7);
+   if (intel_timeline_is_global(tl))
+   flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
+
+   cs = __gen8_emit_write_rcs(cs, rq->fence.seqno, offset, 0, flags);
 
return gen8_emit_fini_breadcrumb_tail(rq, cs);
 }
@@ -601,19 +632,30 @@ u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request 
*rq, u32 *cs)
 
 u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs)
 {
-   cs = gen12_emit_ggtt_write_rcs(cs,
-  rq->fence.seqno,
-  hwsp_offset(rq),
-  PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
-  PIPE_CONTROL_CS_STALL |
-  PIPE_CONTROL_TILE_CACHE_FLUSH |
-