And store the active request so that we only search for it once; this
applies for reset-engine and full reset.

v2: Check for request completion inside _prepare_engine, don't use
ECANCELED, remove unnecessary null checks (Chris).

v3: Capture active requests during reset_prepare and store it the
engine hangcheck obj.

v4: Rename commit, change i915_gem_reset_request to just confirm the
active_request is still incomplete, instead of engine_stalled (Chris).

Suggested-by: Chris Wilson <ch...@chris-wilson.co.uk>
Signed-off-by: Michel Thierry <michel.thie...@intel.com>

fixes

Signed-off-by: Michel Thierry <michel.thie...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c         | 11 ++++++-----
 drivers/gpu/drm/i915/i915_drv.h         |  3 ++-
 drivers/gpu/drm/i915/i915_gem.c         | 34 +++++++++++++++++++++------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  1 +
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d62793805794..ec719376fc24 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1900,15 +1900,16 @@ int i915_reset_engine(struct intel_engine_cs *engine)
 
        DRM_DEBUG_DRIVER("resetting %s\n", engine->name);
 
-       ret = i915_gem_reset_prepare_engine(engine);
-       if (ret) {
-               DRM_ERROR("Previous reset failed - promote to full reset\n");
+       engine->hangcheck.active_request = 
i915_gem_reset_prepare_engine(engine);
+       if (IS_ERR(engine->hangcheck.active_request)) {
+               DRM_DEBUG_DRIVER("Previous reset failed, promote to full 
reset\n");
+               ret = PTR_ERR(engine->hangcheck.active_request);
                goto out;
        }
 
        /*
-        * the request that caused the hang is stuck on elsp, identify the
-        * active request and drop it, adjust head to skip the offending
+        * the request that caused the hang is stuck on elsp, we know the
+        * active request and can drop it, adjust head to skip the offending
         * request to resume executing remaining requests in the queue.
         */
        i915_gem_reset_engine(engine);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a5b9c666b3bf..6cbfeaa02246 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3370,7 +3370,8 @@ static inline u32 i915_reset_count(struct i915_gpu_error 
*error)
        return READ_ONCE(error->reset_count);
 }
 
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine);
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv);
 void i915_gem_reset(struct drm_i915_private *dev_priv);
 void i915_gem_reset_finish_engine(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b5dc073a5ddc..c9f139b322d2 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2793,12 +2793,14 @@ static bool engine_stalled(struct intel_engine_cs 
*engine)
        return true;
 }
 
-/* Ensure irq handler finishes, and not run again. */
-int i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
+/*
+ * Ensure irq handler finishes, and not run again.
+ * Also store the active request so that we only search for it once.
+ */
+struct drm_i915_gem_request *
+i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
 {
-       struct drm_i915_gem_request *request;
-       int err = 0;
-
+       struct drm_i915_gem_request *request = NULL;
 
        /* Prevent the signaler thread from updating the request
         * state (by calling dma_fence_signal) as we are processing
@@ -2827,21 +2829,30 @@ int i915_gem_reset_prepare_engine(struct 
intel_engine_cs *engine)
 
        if (engine_stalled(engine)) {
                request = i915_gem_find_active_request(engine);
+
                if (request && request->fence.error == -EIO)
-                       err = -EIO; /* Previous reset failed! */
+                       return ERR_PTR(-EIO); /* Previous reset failed! */
        }
 
-       return err;
+       return request;
 }
 
 int i915_gem_reset_prepare(struct drm_i915_private *dev_priv)
 {
        struct intel_engine_cs *engine;
+       struct drm_i915_gem_request *request;
        enum intel_engine_id id;
        int err = 0;
 
-       for_each_engine(engine, dev_priv, id)
-               err = i915_gem_reset_prepare_engine(engine);
+       for_each_engine(engine, dev_priv, id) {
+               request = i915_gem_reset_prepare_engine(engine);
+               if (IS_ERR(request)) {
+                       err = PTR_ERR(request);
+                       break;
+               }
+
+               engine->hangcheck.active_request = request;
+       }
 
        i915_gem_revoke_fences(dev_priv);
 
@@ -2894,7 +2905,7 @@ static void engine_skip_context(struct 
drm_i915_gem_request *request)
 static bool i915_gem_reset_request(struct drm_i915_gem_request *request)
 {
        /* Read once and return the resolution */
-       const bool guilty = engine_stalled(request->engine);
+       const bool guilty = !i915_gem_request_completed(request);
 
        /* The guilty request will get skipped on a hung engine.
         *
@@ -2930,9 +2941,8 @@ static bool i915_gem_reset_request(struct 
drm_i915_gem_request *request)
 
 void i915_gem_reset_engine(struct intel_engine_cs *engine)
 {
-       struct drm_i915_gem_request *request;
+       struct drm_i915_gem_request *request = engine->hangcheck.active_request;
 
-       request = i915_gem_find_active_request(engine);
        if (request && i915_gem_reset_request(request)) {
                DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 
0x%x\n",
                                 engine->name, request->global_seqno);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6fde62..f850c4b12337 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -121,6 +121,7 @@ struct intel_engine_hangcheck {
        unsigned long action_timestamp;
        int deadlock;
        struct intel_instdone instdone;
+       struct drm_i915_gem_request *active_request;
        bool stalled;
 };
 
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to