As a precaution against the driver fouling up and missing a hang leaving
the caller in an indefinite wait, manually inspect for a GPU hang if we
timeout whilst waiting for a seqno.

v2: To avoid issues with multiple clients running hangchecks
concurrently or in very rapid succession, make sure we only reactivate
the hangcheck timer if we find it idle whilst waiting.

Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_gem.c |   13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 0d878c1..dc382eb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1096,7 +1096,6 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
        struct timespec before, now, wait_time={1,0};
        unsigned long timeout_jiffies;
        long end;
-       bool wait_forever = true;
        int ret;
 
        if (i915_seqno_passed(ring->get_seqno(ring, true), seqno))
@@ -1104,10 +1103,8 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
 
        trace_i915_gem_request_wait_begin(ring, seqno);
 
-       if (timeout != NULL) {
+       if (timeout != NULL)
                wait_time = *timeout;
-               wait_forever = false;
-       }
 
        timeout_jiffies = timespec_to_jiffies(&wait_time);
 
@@ -1129,6 +1126,12 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
                else
                        end = wait_event_timeout(ring->irq_queue, EXIT_COND,
                                                 timeout_jiffies);
+               /* Be paranoid and check that we haven't missed a GPU hang */
+               if (end == 0 &&
+                   i915_enable_hangcheck &&
+                   !timer_pending(&dev_priv->gpu_error.hangcheck_timer))
+                       mod_timer(&dev_priv->gpu_error.hangcheck_timer,
+                                 round_jiffies_up(jiffies + 
DRM_I915_HANGCHECK_JIFFIES));
 
                /* We need to check whether any gpu reset happened in between
                 * the caller grabbing the seqno and now ... */
@@ -1140,7 +1143,7 @@ static int __wait_seqno(struct intel_ring_buffer *ring, 
u32 seqno,
                ret = i915_gem_check_wedge(&dev_priv->gpu_error, interruptible);
                if (ret)
                        end = ret;
-       } while (end == 0 && wait_forever);
+       } while (end == 0 && timeout == NULL);
 
        getrawmonotonic(&now);
 
-- 
1.7.10.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to