On Gen6+ we have other rings which may be in use. We haven't hung if the blit or media ring is still going
Signed-off-by: Ben Widawsky <[email protected]> --- drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_drv.h | 9 +-- drivers/gpu/drm/i915/i915_irq.c | 146 ++++++++++++++++++++++++----------- 3 files changed, 107 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 1f02971..c00dee5 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -755,20 +755,20 @@ static int i915_error_state(struct seq_file *m, void *unused) seq_printf(m, "Blitter Page Fault: 0x%08x\n", error->page_fault[BCS]); seq_printf(m, "ERROR: 0x%08x\n", error->error); seq_printf(m, "Blitter command stream:\n"); - seq_printf(m, " ACTHD: 0x%08x\n", error->bcs_acthd); + seq_printf(m, " ACTHD: 0x%08x\n", error->acthd[BCS]); seq_printf(m, " IPEIR: 0x%08x\n", error->bcs_ipeir); seq_printf(m, " IPEHR: 0x%08x\n", error->bcs_ipehr); seq_printf(m, " INSTDONE: 0x%08x\n", error->bcs_instdone); seq_printf(m, " seqno: 0x%08x\n", error->bcs_seqno); seq_printf(m, "Video (BSD) command stream:\n"); - seq_printf(m, " ACTHD: 0x%08x\n", error->vcs_acthd); + seq_printf(m, " ACTHD: 0x%08x\n", error->acthd[VCS]); seq_printf(m, " IPEIR: 0x%08x\n", error->vcs_ipeir); seq_printf(m, " IPEHR: 0x%08x\n", error->vcs_ipehr); seq_printf(m, " INSTDONE: 0x%08x\n", error->vcs_instdone); seq_printf(m, " seqno: 0x%08x\n", error->vcs_seqno); } seq_printf(m, "Render command stream:\n"); - seq_printf(m, " ACTHD: 0x%08x\n", error->acthd); + seq_printf(m, " ACTHD: 0x%08x\n", error->acthd[RCS]); seq_printf(m, " IPEIR: 0x%08x\n", error->ipeir); seq_printf(m, " IPEHR: 0x%08x\n", error->ipehr); seq_printf(m, " INSTDONE: 0x%08x\n", error->instdone); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 279560e..d4e8d42 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -152,15 +152,13 @@ struct drm_i915_error_state { u32 ipeir; u32 ipehr; u32 instdone; - u32 acthd; + u32 acthd[I915_NUM_RINGS]; u32 page_fault[I915_NUM_RINGS]; u32 error; /* gen6+ */ - u32 bcs_acthd; /* gen6+ blt engine */ u32 bcs_ipehr; u32 bcs_ipeir; u32 bcs_instdone; u32 bcs_seqno; - u32 vcs_acthd; /* gen6+ bsd engine */ u32 vcs_ipehr; u32 vcs_ipeir; u32 vcs_instdone; @@ -332,9 +330,8 @@ typedef struct drm_i915_private { #define DRM_I915_HANGCHECK_PERIOD 1500 /* in ms */ struct timer_list hangcheck_timer; int hangcheck_count; - uint32_t last_acthd; - uint32_t last_instdone; - uint32_t last_instdone1; + uint32_t last_acthd[I915_NUM_RINGS]; + uint64_t last_instdone[I915_NUM_RINGS]; unsigned long cfb_size; unsigned int cfb_fb; diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 990abda..7f228ec 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -919,7 +919,7 @@ static void i915_capture_error_state(struct drm_device *dev) error->page_fault[BCS] = I915_READ(GEN6_BLT_FAULT); error->error = I915_READ(ERROR_GEN6); - error->bcs_acthd = I915_READ(BCS_ACTHD); + error->acthd[BCS] = I915_READ(BCS_ACTHD); error->bcs_ipehr = I915_READ(BCS_IPEHR); error->bcs_ipeir = I915_READ(BCS_IPEIR); error->bcs_instdone = I915_READ(BCS_INSTDONE); @@ -927,7 +927,7 @@ static void i915_capture_error_state(struct drm_device *dev) if (dev_priv->ring[BCS].get_seqno) error->bcs_seqno = dev_priv->ring[BCS].get_seqno(&dev_priv->ring[BCS]); - error->vcs_acthd = I915_READ(VCS_ACTHD); + error->acthd[VCS] = I915_READ(VCS_ACTHD); error->vcs_ipehr = I915_READ(VCS_IPEHR); error->vcs_ipeir = I915_READ(VCS_IPEIR); error->vcs_instdone = I915_READ(VCS_INSTDONE); @@ -941,13 +941,13 @@ static void i915_capture_error_state(struct drm_device *dev) error->instdone = I915_READ(INSTDONE_I965); error->instps = I915_READ(INSTPS); error->instdone1 = I915_READ(INSTDONE1); - error->acthd = I915_READ(ACTHD_I965); + error->acthd[RCS] = I915_READ(ACTHD_I965); error->bbaddr = I915_READ64(BB_ADDR); } else { error->ipeir = I915_READ(IPEIR); error->ipehr = I915_READ(IPEHR); error->instdone = I915_READ(INSTDONE); - error->acthd = I915_READ(ACTHD); + error->acthd[RCS] = I915_READ(ACTHD); error->bbaddr = 0; } i915_gem_record_fences(dev, error); @@ -1659,6 +1659,83 @@ static bool kick_ring(struct intel_ring_buffer *ring) return false; } +static bool +instdone_stuck(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint64_t instdone = 0, instdone1 = 0, vcs_instdone = 0, bcs_instdone = 0; + bool stuck; + + switch (INTEL_INFO(dev)->gen) { + case 7: + case 6: + bcs_instdone = I915_READ(BCS_INSTDONE); + case 5: + vcs_instdone = I915_READ(VCS_INSTDONE); + case 4: + instdone = I915_READ(INSTDONE_I965); + instdone1 = I915_READ(INSTDONE1); + break; + case 3: + case 2: + instdone = I915_READ(INSTDONE); + break; + } + + stuck = + (dev_priv->last_instdone[RCS] == ((instdone << 32) | instdone1)) && + (dev_priv->last_instdone[VCS] == vcs_instdone) && + (dev_priv->last_instdone[BCS] == bcs_instdone); + + dev_priv->last_instdone[RCS] = (instdone << 32) | instdone1; + dev_priv->last_instdone[VCS] = vcs_instdone; + dev_priv->last_instdone[BCS] = bcs_instdone; + + return stuck; +} + +static bool +acthd_stuck(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + uint32_t acthd = 0, vcs_acthd = 0, bcs_acthd = 0; + bool stuck = false; + + switch (INTEL_INFO(dev)->gen) { + case 7: + case 6: + bcs_acthd = intel_ring_get_active_head(&dev_priv->ring[BCS]); + case 5: + vcs_acthd = intel_ring_get_active_head(&dev_priv->ring[VCS]); + case 4: + case 3: + case 2: + acthd = intel_ring_get_active_head(&dev_priv->ring[RCS]); + break; + } + + stuck = + (dev_priv->last_acthd[RCS] == acthd) && + (dev_priv->last_acthd[VCS] == vcs_acthd) && + (dev_priv->last_acthd[BCS] == bcs_acthd); + + dev_priv->last_acthd[RCS] = acthd; + dev_priv->last_acthd[VCS] = vcs_acthd; + dev_priv->last_acthd[BCS] = bcs_acthd; + + return stuck; +} + +static bool gpu_stuck(struct drm_device *dev) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + + if (dev_priv->hangcheck_count++ == 0) + return false; + + return acthd_stuck(dev) || instdone_stuck(dev); +} + /** * This is called when the chip hasn't reported back with completed * batchbuffers in a long time. The first time this is called we simply record @@ -1669,7 +1746,6 @@ void i915_hangcheck_elapsed(unsigned long data) { struct drm_device *dev = (struct drm_device *)data; drm_i915_private_t *dev_priv = dev->dev_private; - uint32_t acthd, instdone, instdone1; bool err = false; if (!i915_enable_hangcheck) @@ -1685,50 +1761,30 @@ void i915_hangcheck_elapsed(unsigned long data) return; } - if (INTEL_INFO(dev)->gen < 4) { - acthd = I915_READ(ACTHD); - instdone = I915_READ(INSTDONE); - instdone1 = 0; - } else { - acthd = I915_READ(ACTHD_I965); - instdone = I915_READ(INSTDONE_I965); - instdone1 = I915_READ(INSTDONE1); - } - - if (dev_priv->last_acthd == acthd && - dev_priv->last_instdone == instdone && - dev_priv->last_instdone1 == instdone1) { - if (dev_priv->hangcheck_count++ > 1) { - DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); + if (gpu_stuck(dev)) { + DRM_ERROR("Hangcheck timer elapsed... GPU hung\n"); - if (!IS_GEN2(dev)) { - /* Is the chip hanging on a WAIT_FOR_EVENT? - * If so we can simply poke the RB_WAIT bit - * and break the hang. This should work on - * all but the second generation chipsets. - */ - - if (kick_ring(&dev_priv->ring[RCS])) - goto repeat; + if (!IS_GEN2(dev)) { + /* Is the chip hanging on a WAIT_FOR_EVENT? + * If so we can simply poke the RB_WAIT bit + * and break the hang. This should work on + * all but the second generation chipsets. + */ - if (HAS_BSD(dev) && - kick_ring(&dev_priv->ring[VCS])) - goto repeat; + if (kick_ring(&dev_priv->ring[RCS])) + goto repeat; - if (HAS_BLT(dev) && - kick_ring(&dev_priv->ring[BCS])) - goto repeat; - } + if (HAS_BSD(dev) && + kick_ring(&dev_priv->ring[VCS])) + goto repeat; - i915_handle_error(dev, true); - return; + if (HAS_BLT(dev) && + kick_ring(&dev_priv->ring[BCS])) + goto repeat; } - } else { - dev_priv->hangcheck_count = 0; - dev_priv->last_acthd = acthd; - dev_priv->last_instdone = instdone; - dev_priv->last_instdone1 = instdone1; + i915_handle_error(dev, true); + return; } repeat: -- 1.7.6.4 _______________________________________________ Intel-gfx mailing list [email protected] http://lists.freedesktop.org/mailman/listinfo/intel-gfx
