Re: [Intel-gfx] [PATCH] drm/i915: Make closing request flush mandatory
Quoting Joonas Lahtinen (2018-06-13 13:54:01) > Quoting Chris Wilson (2018-06-12 13:51:35) > > For symmetry, simplicity and ensuring the request is always truly idle > > upon its completion, always emit the closing flush prior to emitting the > > request breadcrumb. Previously, we would only emit the flush if we had > > started a user batch, but this just leaves all the other paths open to > > speculation (do they affect the GPU caches or not?) With mm switching, a > > key requirement is that the GPU is flushed and invalidated before hand, > > so for absolute safety, we want that closing flush be mandatory. > > > > Signed-off-by: Chris Wilson > > No word about perf impact? The patch description matches what it does. > > Assuming we're not murdering any testcases; > > Reviewed-by: Joonas Lahtinen No magpies, thanks for the review. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Make closing request flush mandatory
Quoting Joonas Lahtinen (2018-06-13 13:54:01) > Quoting Chris Wilson (2018-06-12 13:51:35) > > For symmetry, simplicity and ensuring the request is always truly idle > > upon its completion, always emit the closing flush prior to emitting the > > request breadcrumb. Previously, we would only emit the flush if we had > > started a user batch, but this just leaves all the other paths open to > > speculation (do they affect the GPU caches or not?) With mm switching, a > > key requirement is that the GPU is flushed and invalidated before hand, > > so for absolute safety, we want that closing flush be mandatory. > > > > Signed-off-by: Chris Wilson > > No word about perf impact? The patch description matches what it does. There's no change to the normal execution paths. There may be a change on interrupted execution paths, but they are already returning to userspace empty handed and not as relevant to performance. We are just making sure the odd ones don't have any odd side effects. -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Make closing request flush mandatory
Quoting Chris Wilson (2018-06-12 13:51:35) > For symmetry, simplicity and ensuring the request is always truly idle > upon its completion, always emit the closing flush prior to emitting the > request breadcrumb. Previously, we would only emit the flush if we had > started a user batch, but this just leaves all the other paths open to > speculation (do they affect the GPU caches or not?) With mm switching, a > key requirement is that the GPU is flushed and invalidated before hand, > so for absolute safety, we want that closing flush be mandatory. > > Signed-off-by: Chris Wilson No word about perf impact? The patch description matches what it does. Assuming we're not murdering any testcases; Reviewed-by: Joonas Lahtinen Regards, Joonas > --- > drivers/gpu/drm/i915/i915_gem.c| 4 ++-- > drivers/gpu/drm/i915/i915_gem_context.c| 9 + > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- > drivers/gpu/drm/i915/i915_request.c| 18 ++ > drivers/gpu/drm/i915/i915_request.h| 4 +--- > drivers/gpu/drm/i915/selftests/huge_pages.c| 2 +- > .../drm/i915/selftests/i915_gem_coherency.c| 4 ++-- > .../gpu/drm/i915/selftests/i915_gem_context.c | 4 ++-- > drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- > .../gpu/drm/i915/selftests/intel_hangcheck.c | 16 > drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- > .../gpu/drm/i915/selftests/intel_workarounds.c | 2 +- > 12 files changed, 24 insertions(+), 47 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 93efd92362db..8dd4d35655af 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -3213,7 +3213,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, > rq = i915_request_alloc(engine, > dev_priv->kernel_context); > if (!IS_ERR(rq)) > - __i915_request_add(rq, false); > + i915_request_add(rq); > } > } > > @@ -5332,7 +5332,7 @@ static int __intel_engines_record_defaults(struct > drm_i915_private *i915) > if (engine->init_context) > err = engine->init_context(rq); > > - __i915_request_add(rq, true); > + i915_request_add(rq); > if (err) > goto err_active; > } > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c > b/drivers/gpu/drm/i915/i915_gem_context.c > index b2c7ac1b074d..ef6ea4bcd773 100644 > --- a/drivers/gpu/drm/i915/i915_gem_context.c > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > @@ -700,14 +700,7 @@ int i915_gem_switch_to_kernel_context(struct > drm_i915_private *i915) > i915_timeline_sync_set(rq->timeline, >fence); > } > > - /* > -* Force a flush after the switch to ensure that all rendering > -* and operations prior to switching to the kernel context > hits > -* memory. This should be guaranteed by the previous request, > -* but an extra layer of paranoia before we declare the system > -* idle (on suspend etc) is advisable! > -*/ > - __i915_request_add(rq, true); > + i915_request_add(rq); > } > > return 0; > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > index 2d2eb3075960..60dc2a865f5f 100644 > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c > @@ -921,7 +921,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) > i915_gem_object_unpin_map(cache->rq->batch->obj); > i915_gem_chipset_flush(cache->rq->i915); > > - __i915_request_add(cache->rq, true); > + i915_request_add(cache->rq); > cache->rq = NULL; > } > > @@ -2438,7 +2438,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, > trace_i915_request_queue(eb.request, eb.batch_flags); > err = eb_submit(); > err_request: > - __i915_request_add(eb.request, err == 0); > + i915_request_add(eb.request); > add_to_client(eb.request, file); > > if (fences) > diff --git a/drivers/gpu/drm/i915/i915_request.c > b/drivers/gpu/drm/i915/i915_request.c > index 9092f5464c24..e1dbb544046f 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -1018,14 +1018,13 @@ i915_request_await_object(struct i915_request *to, > * request is not being tracked for completion but the work itself is > * going to happen on the hardware. This would be a Bad Thing(tm). > */ > -void __i915_request_add(struct i915_request *request, bool flush_caches) > +void i915_request_add(struct
[Intel-gfx] [PATCH] drm/i915: Make closing request flush mandatory
For symmetry, simplicity and ensuring the request is always truly idle upon its completion, always emit the closing flush prior to emitting the request breadcrumb. Previously, we would only emit the flush if we had started a user batch, but this just leaves all the other paths open to speculation (do they affect the GPU caches or not?) With mm switching, a key requirement is that the GPU is flushed and invalidated before hand, so for absolute safety, we want that closing flush be mandatory. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c| 4 ++-- drivers/gpu/drm/i915/i915_gem_context.c| 9 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 ++-- drivers/gpu/drm/i915/i915_request.c| 18 ++ drivers/gpu/drm/i915/i915_request.h| 4 +--- drivers/gpu/drm/i915/selftests/huge_pages.c| 2 +- .../drm/i915/selftests/i915_gem_coherency.c| 4 ++-- .../gpu/drm/i915/selftests/i915_gem_context.c | 4 ++-- drivers/gpu/drm/i915/selftests/i915_request.c | 2 +- .../gpu/drm/i915/selftests/intel_hangcheck.c | 16 drivers/gpu/drm/i915/selftests/intel_lrc.c | 2 +- .../gpu/drm/i915/selftests/intel_workarounds.c | 2 +- 12 files changed, 24 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 93efd92362db..8dd4d35655af 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3213,7 +3213,7 @@ void i915_gem_reset(struct drm_i915_private *dev_priv, rq = i915_request_alloc(engine, dev_priv->kernel_context); if (!IS_ERR(rq)) - __i915_request_add(rq, false); + i915_request_add(rq); } } @@ -5332,7 +5332,7 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915) if (engine->init_context) err = engine->init_context(rq); - __i915_request_add(rq, true); + i915_request_add(rq); if (err) goto err_active; } diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index b2c7ac1b074d..ef6ea4bcd773 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -700,14 +700,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *i915) i915_timeline_sync_set(rq->timeline, >fence); } - /* -* Force a flush after the switch to ensure that all rendering -* and operations prior to switching to the kernel context hits -* memory. This should be guaranteed by the previous request, -* but an extra layer of paranoia before we declare the system -* idle (on suspend etc) is advisable! -*/ - __i915_request_add(rq, true); + i915_request_add(rq); } return 0; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 2d2eb3075960..60dc2a865f5f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -921,7 +921,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache) i915_gem_object_unpin_map(cache->rq->batch->obj); i915_gem_chipset_flush(cache->rq->i915); - __i915_request_add(cache->rq, true); + i915_request_add(cache->rq); cache->rq = NULL; } @@ -2438,7 +2438,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, trace_i915_request_queue(eb.request, eb.batch_flags); err = eb_submit(); err_request: - __i915_request_add(eb.request, err == 0); + i915_request_add(eb.request); add_to_client(eb.request, file); if (fences) diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 9092f5464c24..e1dbb544046f 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1018,14 +1018,13 @@ i915_request_await_object(struct i915_request *to, * request is not being tracked for completion but the work itself is * going to happen on the hardware. This would be a Bad Thing(tm). */ -void __i915_request_add(struct i915_request *request, bool flush_caches) +void i915_request_add(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; struct i915_timeline *timeline = request->timeline; struct intel_ring *ring = request->ring; struct i915_request *prev; u32 *cs; - int err; GEM_TRACE("%s fence %llx:%d\n", engine->name, request->fence.context, request->fence.seqno); @@ -1046,20 +1045,7 @@ void