Re: [RFC 1/2] drm/vc4: Handle async page flips in the atomic_commit() path

2018-03-30 Thread Boris Brezillon
On Fri, 30 Mar 2018 17:09:03 +0200
Boris Brezillon  wrote:

> Rework the atomic commit logic to handle async page flip requests in
> the atomic update path.
> 
> Async page flips are a bit more complicated than async cursor updates
> (already handled in the vc4_atomic_commit() function) in that you need
> to wait for fences on the new framebuffers to be signaled before doing
> the flip. In order to ensure that, we schedule a commit_work work to do
> the async update (note that the existing implementation already uses a
> work to wait for fences to be signaled, so, the latency shouldn't
> change that much).
> 
> Of course, we have to modify a bit the atomic_complete_commit()
> implementation to avoid waiting for things we don't care about when
> doing an async page flip:
> 
> * drm_atomic_helper_wait_for_dependencies() waits for flip_done which
>   we don't care about when doing async page flips. Instead we replace
>   that call by a loop that waits for hw_done only
> * drm_atomic_helper_commit_modeset_disables() and
>   drm_atomic_helper_commit_modeset_enables() are not needed because
>   nothing except the planes' FBs are updated in async page flips
> * drm_atomic_helper_commit_planes() is replaced by
>   vc4_plane_async_set_fb() which is doing only the FB update and is
>   thus much simpler
> * drm_atomic_helper_wait_for_vblanks() is not needed because we don't
>   wait for the next VBLANK period to apply the new state, and flip
>   events are signaled manually just after the HW has been updated
> 
> Thanks to this rework, we can get rid of the custom vc4_page_flip()
> implementation and its dependencies and use
> drm_atomic_helper_page_flip() instead.

Another good reason for moving async page flip to the atomic commit
path is that is fixes a bug we had:
drm_atomic_helper_prepare/cleanup_planes() were not called in the async
page flip path, which can lead to unbalanced vc4_inc/dec_usecnt()
in some situations (when the plane is updated once with an async page
flip and then with a regular update) or trigger a use after free if the
BO passed to the plane is marked purgeable and the kernel decides to
purge its cache.

> 
> Signed-off-by: Boris Brezillon 
> ---
>  drivers/gpu/drm/vc4/vc4_crtc.c | 103 
> +
>  drivers/gpu/drm/vc4/vc4_kms.c  | 101 +---
>  2 files changed, 86 insertions(+), 118 deletions(-)
> 
> diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
> index bf4667481935..3843c39dce61 100644
> --- a/drivers/gpu/drm/vc4/vc4_crtc.c
> +++ b/drivers/gpu/drm/vc4/vc4_crtc.c
> @@ -757,107 +757,6 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void 
> *data)
>   return ret;
>  }
>  
> -struct vc4_async_flip_state {
> - struct drm_crtc *crtc;
> - struct drm_framebuffer *fb;
> - struct drm_pending_vblank_event *event;
> -
> - struct vc4_seqno_cb cb;
> -};
> -
> -/* Called when the V3D execution for the BO being flipped to is done, so that
> - * we can actually update the plane's address to point to it.
> - */
> -static void
> -vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
> -{
> - struct vc4_async_flip_state *flip_state =
> - container_of(cb, struct vc4_async_flip_state, cb);
> - struct drm_crtc *crtc = flip_state->crtc;
> - struct drm_device *dev = crtc->dev;
> - struct vc4_dev *vc4 = to_vc4_dev(dev);
> - struct drm_plane *plane = crtc->primary;
> -
> - vc4_plane_async_set_fb(plane, flip_state->fb);
> - if (flip_state->event) {
> - unsigned long flags;
> -
> - spin_lock_irqsave(>event_lock, flags);
> - drm_crtc_send_vblank_event(crtc, flip_state->event);
> - spin_unlock_irqrestore(>event_lock, flags);
> - }
> -
> - drm_crtc_vblank_put(crtc);
> - drm_framebuffer_put(flip_state->fb);
> - kfree(flip_state);
> -
> - up(>async_modeset);
> -}
> -
> -/* Implements async (non-vblank-synced) page flips.
> - *
> - * The page flip ioctl needs to return immediately, so we grab the
> - * modeset semaphore on the pipe, and queue the address update for
> - * when V3D is done with the BO being flipped to.
> - */
> -static int vc4_async_page_flip(struct drm_crtc *crtc,
> -struct drm_framebuffer *fb,
> -struct drm_pending_vblank_event *event,
> -uint32_t flags)
> -{
> - struct drm_device *dev = crtc->dev;
> - struct vc4_dev *vc4 = to_vc4_dev(dev);
> - struct drm_plane *plane = crtc->primary;
> - int ret = 0;
> - struct vc4_async_flip_state *flip_state;
> - struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
> - struct vc4_bo *bo = to_vc4_bo(_bo->base);
> -
> - flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
> - if (!flip_state)
> - return -ENOMEM;
> -
> - 

[RFC 1/2] drm/vc4: Handle async page flips in the atomic_commit() path

2018-03-30 Thread Boris Brezillon
Rework the atomic commit logic to handle async page flip requests in
the atomic update path.

Async page flips are a bit more complicated than async cursor updates
(already handled in the vc4_atomic_commit() function) in that you need
to wait for fences on the new framebuffers to be signaled before doing
the flip. In order to ensure that, we schedule a commit_work work to do
the async update (note that the existing implementation already uses a
work to wait for fences to be signaled, so, the latency shouldn't
change that much).

Of course, we have to modify a bit the atomic_complete_commit()
implementation to avoid waiting for things we don't care about when
doing an async page flip:

* drm_atomic_helper_wait_for_dependencies() waits for flip_done which
  we don't care about when doing async page flips. Instead we replace
  that call by a loop that waits for hw_done only
* drm_atomic_helper_commit_modeset_disables() and
  drm_atomic_helper_commit_modeset_enables() are not needed because
  nothing except the planes' FBs are updated in async page flips
* drm_atomic_helper_commit_planes() is replaced by
  vc4_plane_async_set_fb() which is doing only the FB update and is
  thus much simpler
* drm_atomic_helper_wait_for_vblanks() is not needed because we don't
  wait for the next VBLANK period to apply the new state, and flip
  events are signaled manually just after the HW has been updated

Thanks to this rework, we can get rid of the custom vc4_page_flip()
implementation and its dependencies and use
drm_atomic_helper_page_flip() instead.

Signed-off-by: Boris Brezillon 
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 103 +
 drivers/gpu/drm/vc4/vc4_kms.c  | 101 +---
 2 files changed, 86 insertions(+), 118 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index bf4667481935..3843c39dce61 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -757,107 +757,6 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void 
*data)
return ret;
 }
 
-struct vc4_async_flip_state {
-   struct drm_crtc *crtc;
-   struct drm_framebuffer *fb;
-   struct drm_pending_vblank_event *event;
-
-   struct vc4_seqno_cb cb;
-};
-
-/* Called when the V3D execution for the BO being flipped to is done, so that
- * we can actually update the plane's address to point to it.
- */
-static void
-vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
-{
-   struct vc4_async_flip_state *flip_state =
-   container_of(cb, struct vc4_async_flip_state, cb);
-   struct drm_crtc *crtc = flip_state->crtc;
-   struct drm_device *dev = crtc->dev;
-   struct vc4_dev *vc4 = to_vc4_dev(dev);
-   struct drm_plane *plane = crtc->primary;
-
-   vc4_plane_async_set_fb(plane, flip_state->fb);
-   if (flip_state->event) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>event_lock, flags);
-   drm_crtc_send_vblank_event(crtc, flip_state->event);
-   spin_unlock_irqrestore(>event_lock, flags);
-   }
-
-   drm_crtc_vblank_put(crtc);
-   drm_framebuffer_put(flip_state->fb);
-   kfree(flip_state);
-
-   up(>async_modeset);
-}
-
-/* Implements async (non-vblank-synced) page flips.
- *
- * The page flip ioctl needs to return immediately, so we grab the
- * modeset semaphore on the pipe, and queue the address update for
- * when V3D is done with the BO being flipped to.
- */
-static int vc4_async_page_flip(struct drm_crtc *crtc,
-  struct drm_framebuffer *fb,
-  struct drm_pending_vblank_event *event,
-  uint32_t flags)
-{
-   struct drm_device *dev = crtc->dev;
-   struct vc4_dev *vc4 = to_vc4_dev(dev);
-   struct drm_plane *plane = crtc->primary;
-   int ret = 0;
-   struct vc4_async_flip_state *flip_state;
-   struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
-   struct vc4_bo *bo = to_vc4_bo(_bo->base);
-
-   flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
-   if (!flip_state)
-   return -ENOMEM;
-
-   drm_framebuffer_get(fb);
-   flip_state->fb = fb;
-   flip_state->crtc = crtc;
-   flip_state->event = event;
-
-   /* Make sure all other async modesetes have landed. */
-   ret = down_interruptible(>async_modeset);
-   if (ret) {
-   drm_framebuffer_put(fb);
-   kfree(flip_state);
-   return ret;
-   }
-
-   WARN_ON(drm_crtc_vblank_get(crtc) != 0);
-
-   /* Immediately update the plane's legacy fb pointer, so that later
-* modeset prep sees the state that will be present when the semaphore
-* is released.
-*/
-   drm_atomic_set_fb_for_plane(plane->state, fb);
-   plane->fb = fb;
-
-   vc4_queue_seqno_cb(dev,