Re: [Intel-gfx] [PATCH 1/3] drm/i915: Check for a stalled page flip after each vblank

2014-06-10 Thread Ville Syrjälä
On Tue, Jun 10, 2014 at 11:04:00AM +0100, Chris Wilson wrote:
 Long ago, back in the racy haydays of 915gm interrupt handling, page
 flips would occasionally go astray and leave the hardware stuck, and the
 display not updating. This annoyed people who relied on their systems
 being able to display continuously updating information 24/7, and so
 some code to detect when the driver missed the page flip completion
 signal was added. Until recently, it was presumed that the interrupt
 handling was now flawless, but once again Simon Farnsworth has found a
 system whose display will stall. Reinstate the pageflip stall detection,
 which works by checking to see if the hardware has been updated to the
 new framebuffer address following each vblank. If the hardware is
 scanning out from the new framebuffer, but we still think the flip is
 pending, then we kick our driver into submision.
 
 This is a continuation of the effort started with
 commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc
 Author: Simon Farnsworth simon.farnswo...@onelan.co.uk
 Date:   Wed Sep 1 17:47:52 2010 +0100
 
 drm/i915: Avoid pageflipping freeze when we miss the flip prepare 
 interrupt
 
 This now includes a belt-and-braces approach to make sure the driver
 (or the hardware) doesn't miss an interrupt and cause us to stop
 updating the display should the unthinkable happen and the pageflip fail - 
 i.e.
 that the user is able to continue submitting flips.
 
 v2: Cleanup, refactor, and rename
 
 Reported-by: Simon Farnsworth si...@farnz.org.uk
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Daniel Vetter daniel.vet...@ffwll.ch
 Cc: Ville Syrjälä ville.syrj...@linux.intel.com
 ---
  drivers/gpu/drm/i915/i915_debugfs.c  |   23 +++---
  drivers/gpu/drm/i915/i915_irq.c  |   85 +++---
  drivers/gpu/drm/i915/intel_display.c |  130 
 --
  drivers/gpu/drm/i915/intel_drv.h |2 +
  4 files changed, 147 insertions(+), 93 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index e8ea1ef..a5f0a26 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -581,6 +581,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
  {
   struct drm_info_node *node = m-private;
   struct drm_device *dev = node-minor-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
   unsigned long flags;
   struct intel_crtc *crtc;
  
 @@ -595,6 +596,8 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
   seq_printf(m, No flip due on pipe %c (plane %c)\n,
  pipe, plane);
   } else {
 + u32 addr;
 +
   if (atomic_read(work-pending)  INTEL_FLIP_COMPLETE) {
   seq_printf(m, Flip queued on pipe %c (plane 
 %c)\n,
  pipe, plane);
 @@ -602,23 +605,23 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
   seq_printf(m, Flip pending (waiting for vsync) 
 on pipe %c (plane %c)\n,
  pipe, plane);
   }
 + seq_printf(m, Flip queued on frame %d, now %d\n,
 +work-sbc, 
 atomic_read(dev-vblank[crtc-pipe].count));
   if (work-enable_stall_check)
   seq_puts(m, Stall check enabled, );
   else
   seq_puts(m, Stall check waiting for page flip 
 ioctl, );
   seq_printf(m, %d prepares\n, 
 atomic_read(work-pending));
  
 - if (work-old_fb_obj) {
 - struct drm_i915_gem_object *obj = 
 work-old_fb_obj;
 - if (obj)
 - seq_printf(m, Old framebuffer 
 gtt_offset 0x%08lx\n,
 -
 i915_gem_obj_ggtt_offset(obj));
 - }
 + if (INTEL_INFO(dev)-gen = 4)
 + addr = 
 I915_HI_DISPBASE(I915_READ(DSPSURF(crtc-plane)));
 + else
 + addr = I915_READ(DSPADDR(crtc-plane));
 + seq_printf(m, Current scanout address 0x%08x\n, addr);
 +
   if (work-pending_flip_obj) {
 - struct drm_i915_gem_object *obj = 
 work-pending_flip_obj;
 - if (obj)
 - seq_printf(m, New framebuffer 
 gtt_offset 0x%08lx\n,
 -
 i915_gem_obj_ggtt_offset(obj));
 + seq_printf(m, New framebuffer address 
 0x%08lx\n, (long)work-gtt_offset);
 +

Re: [Intel-gfx] [PATCH 1/3] drm/i915: Check for a stalled page flip after each vblank

2014-06-10 Thread Chris Wilson
On Tue, Jun 10, 2014 at 02:25:50PM +0300, Ville Syrjälä wrote:
 On Tue, Jun 10, 2014 at 11:04:00AM +0100, Chris Wilson wrote:
  +static inline int crtc_sbc(struct intel_crtc *crtc)
  +{
  +   return atomic_read(crtc-base.dev-vblank[crtc-pipe].count);
  +}
 
 Still says 'sbc' which doesn't make sense to me.

I just don't like the term msc. :-p
crtc_vblank_counter()?
 
  +
   static inline void intel_mark_page_flip_active(struct intel_crtc 
  *intel_crtc)
   {
  /* Ensure that the work item is consistent when activating it ... */
  smp_wmb();
  atomic_set(intel_crtc-unpin_work-pending, INTEL_FLIP_PENDING);
  +   intel_crtc-unpin_work-sbc = crtc_sbc(intel_crtc);
  /* and that it is marked active as soon as the irq could fire. */
  smp_wmb();
   }
  @@ -9265,6 +9279,69 @@ static int intel_default_queue_flip(struct 
  drm_device *dev,
  return -ENODEV;
   }
   
  +static bool i915_gem_object_is_idle(struct drm_i915_gem_object *obj,
  +   bool readonly)
  +{
  +   struct intel_engine_cs *ring = obj-ring;
  +   u32 seqno = readonly ? obj-last_write_seqno : obj-last_read_seqno;
  +
  +   if (ring == NULL || seqno == 0)
  +   return true;
  +
  +   return i915_seqno_passed(ring-get_seqno(ring, true), seqno);
  +}
  +
  +static bool __intel_pageflip_stall_check(struct drm_device *dev,
  +struct drm_crtc *crtc)
  +{
  +   struct drm_i915_private *dev_priv = dev-dev_private;
  +   struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
  +   struct intel_unpin_work *work = intel_crtc-unpin_work;
  +   u32 addr;
  +
  +   if (atomic_read(work-pending) = INTEL_FLIP_COMPLETE)
  +   return true;
  +
  +   if (!work-enable_stall_check)
  +   return false;
  +
  +   if ((crtc_sbc(intel_crtc) - intel_crtc-unpin_work-sbc)  3)
  +   return false;
  +
  +   if (!i915_gem_object_is_idle(work-pending_flip_obj, true))
  +   return false;
 
 I take it this is done to mitigate my premature flip completion
 concerns? Should work for the common case I suppose. Although if
 someone does something like this write,read(s),flip it could
 still complete the flip too soon. Waiting for last_read_seqno would
 avoid that.

It actually predated your concerns. I considered the person who continues
to write to the pending fb and decided that I didn't care too much for
them. What I actually wanted to do was to capture the vblank counter for
when the object was idle and then start watching for a missed flip.
Indeed, tracking when we believe the flip was queued would be better
again.
 
 And double checking the hardware flip counter should avoid this
 problem entirely. We already have it sampled in unpin_work-flip_count
 for the mmio vs. cs flip race so it should be easy to check it here as
 well. I suppose having both the flip counter and seqno checks should
 provide the best protection for all gens.

That was half the reason for waiting for that series to land first. Just
I never adapted to the framecounter code.
 
 As far as the seqno check goes, I was wondering if we should sample
 the seqno when submitting the flip? I'm slightly worried how this will
 work if userspace submitted a flip and already managed to pile more
 rendering on top. This code would then wait for the seqno for those
 later rendering operations.

Right that is how mmio flips avoid this issue.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3] drm/i915: Check for a stalled page flip after each vblank

2014-06-10 Thread Ville Syrjälä
On Tue, Jun 10, 2014 at 12:33:48PM +0100, Chris Wilson wrote:
 On Tue, Jun 10, 2014 at 02:25:50PM +0300, Ville Syrjälä wrote:
  On Tue, Jun 10, 2014 at 11:04:00AM +0100, Chris Wilson wrote:
   +static inline int crtc_sbc(struct intel_crtc *crtc)
   +{
   + return atomic_read(crtc-base.dev-vblank[crtc-pipe].count);
   +}
  
  Still says 'sbc' which doesn't make sense to me.
 
 I just don't like the term msc. :-p
 crtc_vblank_counter()?

Maybe stick it into drmP.h and call it drm_crtc_vblank_counter() or
something? Hopefully people won't confuse it with the hardware counter.

-- 
Ville Syrjälä
Intel OTC
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3] drm/i915: Check for a stalled page flip after each vblank

2014-06-10 Thread Daniel Vetter
On Tue, Jun 10, 2014 at 02:47:29PM +0300, Ville Syrjälä wrote:
 On Tue, Jun 10, 2014 at 12:33:48PM +0100, Chris Wilson wrote:
  On Tue, Jun 10, 2014 at 02:25:50PM +0300, Ville Syrjälä wrote:
   On Tue, Jun 10, 2014 at 11:04:00AM +0100, Chris Wilson wrote:
+static inline int crtc_sbc(struct intel_crtc *crtc)
+{
+   return atomic_read(crtc-base.dev-vblank[crtc-pipe].count);
+}
   
   Still says 'sbc' which doesn't make sense to me.
  
  I just don't like the term msc. :-p
  crtc_vblank_counter()?
 
 Maybe stick it into drmP.h and call it drm_crtc_vblank_counter() or
 something? Hopefully people won't confuse it with the hardware counter.

It's called drm_vblank_count, but a new wrapper which takes a drm_crtc *
as the argument would indeed be nice. So drm_crtc_vblank_counter(struct
drm_crtc *crtc). And please don't forget to add the kerneldoc and to pull
it into the drm docbook.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3] drm/i915: Check for a stalled page flip after each vblank

2014-06-10 Thread Ville Syrjälä
On Tue, Jun 10, 2014 at 01:46:54PM +0100, Chris Wilson wrote:
 Long ago, back in the racy haydays of 915gm interrupt handling, page
 flips would occasionally go astray and leave the hardware stuck, and the
 display not updating. This annoyed people who relied on their systems
 being able to display continuously updating information 24/7, and so
 some code to detect when the driver missed the page flip completion
 signal was added. Until recently, it was presumed that the interrupt
 handling was now flawless, but once again Simon Farnsworth has found a
 system whose display will stall. Reinstate the pageflip stall detection,
 which works by checking to see if the hardware has been updated to the
 new framebuffer address following each vblank. If the hardware is
 scanning out from the new framebuffer, but we still think the flip is
 pending, then we kick our driver into submision.
 
 This is a continuation of the effort started with
 commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc
 Author: Simon Farnsworth simon.farnswo...@onelan.co.uk
 Date:   Wed Sep 1 17:47:52 2010 +0100
 
 drm/i915: Avoid pageflipping freeze when we miss the flip prepare 
 interrupt
 
 This now includes a belt-and-braces approach to make sure the driver
 (or the hardware) doesn't miss an interrupt and cause us to stop
 updating the display should the unthinkable happen and the pageflip fail - 
 i.e.
 that the user is able to continue submitting flips.
 
 v2: Cleanup, refactor, and rename
 v3: Only start counting vblanks after the flip command has been seen by
 the hardware.
 
 Reported-by: Simon Farnsworth si...@farnz.org.uk
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 Cc: Daniel Vetter daniel.vet...@ffwll.ch
 Cc: Ville Syrjälä ville.syrj...@linux.intel.com

Reviewed-by: Ville Syrjälä ville.syrj...@linux.intel.com

Someone may want to add the flip counter check later. At least there's a
comment about danger of false positives.

 ---
  drivers/gpu/drm/i915/i915_debugfs.c  |  29 ++---
  drivers/gpu/drm/i915/i915_irq.c  |  85 
  drivers/gpu/drm/i915/intel_display.c | 121 
 ---
  drivers/gpu/drm/i915/intel_drv.h |   5 ++
  4 files changed, 147 insertions(+), 93 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index e8ea1efd3810..eae1a184 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -581,6 +581,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
  {
   struct drm_info_node *node = m-private;
   struct drm_device *dev = node-minor-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
   unsigned long flags;
   struct intel_crtc *crtc;
  
 @@ -595,6 +596,8 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
   seq_printf(m, No flip due on pipe %c (plane %c)\n,
  pipe, plane);
   } else {
 + u32 addr;
 +
   if (atomic_read(work-pending)  INTEL_FLIP_COMPLETE) {
   seq_printf(m, Flip queued on pipe %c (plane 
 %c)\n,
  pipe, plane);
 @@ -602,23 +605,29 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
   seq_printf(m, Flip pending (waiting for vsync) 
 on pipe %c (plane %c)\n,
  pipe, plane);
   }
 + seq_printf(m, Flip queued on %s at seqno %u, now %u\n,
 +work-ring-name,
 +work-flip_queued_seqno,
 +work-ring-get_seqno(work-ring, true));
 + seq_printf(m, Flip queued on frame %d, (was ready on 
 frame %d), now %d\n,
 +work-flip_queued_vblank,
 +work-flip_ready_vblank,
 +drm_vblank_count(dev, crtc-pipe));
   if (work-enable_stall_check)
   seq_puts(m, Stall check enabled, );
   else
   seq_puts(m, Stall check waiting for page flip 
 ioctl, );
   seq_printf(m, %d prepares\n, 
 atomic_read(work-pending));
  
 - if (work-old_fb_obj) {
 - struct drm_i915_gem_object *obj = 
 work-old_fb_obj;
 - if (obj)
 - seq_printf(m, Old framebuffer 
 gtt_offset 0x%08lx\n,
 -
 i915_gem_obj_ggtt_offset(obj));
 - }
 + if (INTEL_INFO(dev)-gen = 4)
 + addr = 
 I915_HI_DISPBASE(I915_READ(DSPSURF(crtc-plane)));

Re: [Intel-gfx] [PATCH 1/3] drm/i915: Check for a stalled page flip after each vblank

2014-06-09 Thread Ville Syrjälä
On Fri, May 30, 2014 at 05:16:34PM +0100, Chris Wilson wrote:
 Long ago, back in the racy haydays of 915gm interrupt handling, page
 flips would occasionally go astray and leave the hardware stuck, and the
 display not updating. This annoyed people who relied on their systems
 being able to display continuously updating information 24/7, and so
 some code to detect when the driver missed the page flip completion
 signal was added. Until recently, it was presumed that the interrupt
 handling was now flawless, but once again Simon Farnsworth has found a
 system whose display will stall. Reinstate the pageflip stall detection,
 which works by checking to see if the hardware has been updated to the
 new framebuffer address following each vblank. If the hardware is
 scanning out from the new framebuffer, but we still think the flip is
 pending, then we kick our driver into submision.
 
 This is a continuation of the effort started with
 commit 4e5359cd053bfb7d8dabe4a63624a5726848ffbc
 Author: Simon Farnsworth simon.farnswo...@onelan.co.uk
 Date:   Wed Sep 1 17:47:52 2010 +0100
 
 drm/i915: Avoid pageflipping freeze when we miss the flip prepare 
 interrupt
 
 This now includes a belt-and-braces approach to make sure the driver
 (or the hardware) doesn't miss an interrupt and cause us to stop
 updating the display should the unthinkable happen and the pageflip fail - 
 i.e.
 that the user is able to continue submitting flips.
 
 Reported-by: Simon Farnsworth si...@farnz.org.uk
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75502
 Signed-off-by: Chris Wilson ch...@chris-wilson.co.uk
 ---
  drivers/gpu/drm/i915/i915_debugfs.c  |  33 ---
  drivers/gpu/drm/i915/i915_irq.c  |  85 ---
  drivers/gpu/drm/i915/intel_display.c | 109 
 ++-
  drivers/gpu/drm/i915/intel_drv.h |   2 +
  4 files changed, 144 insertions(+), 85 deletions(-)
 
 diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
 b/drivers/gpu/drm/i915/i915_debugfs.c
 index 0b063c03d188..a05a33ab4b33 100644
 --- a/drivers/gpu/drm/i915/i915_debugfs.c
 +++ b/drivers/gpu/drm/i915/i915_debugfs.c
 @@ -581,6 +581,7 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
  {
   struct drm_info_node *node = m-private;
   struct drm_device *dev = node-minor-dev;
 + struct drm_i915_private *dev_priv = dev-dev_private;
   unsigned long flags;
   struct intel_crtc *crtc;
  
 @@ -602,23 +603,37 @@ static int i915_gem_pageflip_info(struct seq_file *m, 
 void *data)
   seq_printf(m, Flip pending (waiting for vsync) 
 on pipe %c (plane %c)\n,
  pipe, plane);
   }
 + seq_printf(m, Flip queued on frame %d, now %d\n,
 +work-sbc, 
 atomic_read(dev-vblank[crtc-pipe].count));
   if (work-enable_stall_check)
   seq_puts(m, Stall check enabled, );
   else
   seq_puts(m, Stall check waiting for page flip 
 ioctl, );
   seq_printf(m, %d prepares\n, 
 atomic_read(work-pending));
  
 - if (work-old_fb_obj) {
 - struct drm_i915_gem_object *obj = 
 work-old_fb_obj;
 - if (obj)
 - seq_printf(m, Old framebuffer 
 gtt_offset 0x%08lx\n,
 -
 i915_gem_obj_ggtt_offset(obj));
 + {
 + u32 addr;
 +
 + if (INTEL_INFO(dev)-gen = 4)
 + addr = DSPSURF(crtc-plane);
 + else
 + addr = DSPADDR(crtc-plane);
 +
 + seq_printf(m, Current scanout address 
 0x%08x\n, 
 +I915_READ(addr));

current makes me think of the live address. But right now I can't think of a
better term to use there.

   }

I don't like the extra {} block. Such things always give me an
impression that this is a debug hack someone forgot to remove.

 +
   if (work-pending_flip_obj) {
 - struct drm_i915_gem_object *obj = 
 work-pending_flip_obj;
 - if (obj)
 - seq_printf(m, New framebuffer 
 gtt_offset 0x%08lx\n,
 -
 i915_gem_obj_ggtt_offset(obj));
 + bool complete;
 +
 + seq_printf(m, New framebuffer address 
 0x%08lx\n, (long)work-gtt_offset);
 +
 + if (INTEL_INFO(dev)-gen = 4)
 + complete = 
 I915_HI_DISPBASE(I915_READ(DSPSURF(crtc-plane))) == work-gtt_offset;
 +