display: Add fast path for cursor plane updates

Kazlauskas, Nicholas Thu, 06 Dec 2018 05:42:43 -0800

On 12/5/18 4:01 PM, Kazlauskas, Nicholas wrote:
> On 2018-12-05 3:44 p.m., Grodzovsky, Andrey wrote:
>>
>>
>> On 12/05/2018 03:42 PM, Kazlauskas, Nicholas wrote:
>>> On 2018-12-05 3:26 p.m., Grodzovsky, Andrey wrote:
>>>>
>>>> On 12/05/2018 02:59 PM, Nicholas Kazlauskas wrote:
>>>>> [Why]
>>>>> Legacy cursor plane updates from drm helpers go through the full
>>>>> atomic codepath. A high volume of cursor updates through this slow
>>>>> code path can cause subsequent page-flips to skip vblank intervals
>>>>> since each individual update is slow.
>>>>>
>>>>> This problem is particularly noticeable for the compton compositor.
>>>>>
>>>>> [How]
>>>>> A fast path for cursor plane updates is added by using DRM asynchronous
>>>>> commit support provided by async_check and async_update. These don't do
>>>>> a full state/flip_done dependency stall and they don't block other
>>>>> commit work.
>>>>>
>>>>> However, DC still expects itself to be single-threaded for anything
>>>>> that can issue register writes. Screen corruption or hangs can occur
>>>>> if write sequences overlap. Every call that potentially perform
>>>>> register writes needs to be guarded for asynchronous updates to work.
>>>>> The dc_lock mutex was added for this.
>>>> As far as page flip related register writes concerned this I think this
>>>> was never an issue - we always
>>>> supported fully concurrent page flips on different CRTCs meaning writing
>>>> surface address concurrently
>>>> on different pipes. So Are you sure you  can't do cursor update
>>>> concurrently against at least page flips ?
>>>> I am not sure about full updates.
>>>>
>>>> Andrey
>>> I think this was true before we started locking the pipes around cursor
>>> updates (and probably only for dce). The problem that occur here is
>>> writing to the lock register from multiple threads at the same time.
>>>
>>> In general I think the "contract" between dm/dc now is that anything
>>> from dc that does register write sequences can't be called from multiple
>>> threads.
>>>
>>> Nicholas Kazlauskas
>>
>> Ok, do note that you also serialize all the page flips now which looks
>> unneeded - maybe consider r/w lock to at least avoid that.
>>
>> Andrey
> 
> Yeah, they definitely shouldn't be happening at the same time as the
> cursor calls.
> 
> I'd need to look into whether it's okay now to do concurrent writes from
> the stream update functions though, but I'd imagine it's not something
> we'd want to be doing. A r/w lock would work if we could though.
> 
> Nicholas Kazlauskas


A reader/writer lock wouldn't help here, actually.

Fast updates (and page flips in particular) need to lock the pipe as 
well, see: commit_planes_for_stream.

Nicholas Kazlauskas

> 
>>
>>>
>>>>> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106175
>>>>>
>>>>> Cc: Leo Li <sunpeng...@amd.com>
>>>>> Cc: Harry Wentland <harry.wentl...@amd.com>
>>>>> Signed-off-by: Nicholas Kazlauskas <nicholas.kazlaus...@amd.com>
>>>>> ---
>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 
>>>>> ++++++++++++++++++-
>>>>>       .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  8 +++
>>>>>       2 files changed, 73 insertions(+), 2 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
>>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> index 23d61570df17..4df14a50a8ac 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
>>>>> @@ -57,6 +57,7 @@
>>>>>       
>>>>>       #include <drm/drmP.h>
>>>>>       #include <drm/drm_atomic.h>
>>>>> +#include <drm/drm_atomic_uapi.h>
>>>>>       #include <drm/drm_atomic_helper.h>
>>>>>       #include <drm/drm_dp_mst_helper.h>
>>>>>       #include <drm/drm_fb_helper.h>
>>>>> @@ -133,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct 
>>>>> drm_atomic_state *state);
>>>>>       static int amdgpu_dm_atomic_check(struct drm_device *dev,
>>>>>                                     struct drm_atomic_state *state);
>>>>>       
>>>>> +static void handle_cursor_update(struct drm_plane *plane,
>>>>> +                          struct drm_plane_state *old_plane_state);
>>>>>       
>>>>>       
>>>>>       
>>>>> @@ -402,6 +405,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
>>>>>           /* Zero all the fields */
>>>>>           memset(&init_data, 0, sizeof(init_data));
>>>>>       
>>>>> + mutex_init(&adev->dm.dc_lock);
>>>>> +
>>>>>           if(amdgpu_dm_irq_init(adev)) {
>>>>>                   DRM_ERROR("amdgpu: failed to initialize DM IRQ 
>>>>> support.\n");
>>>>>                   goto error;
>>>>> @@ -516,6 +521,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
>>>>>           /* DC Destroy TODO: Replace destroy DAL */
>>>>>           if (adev->dm.dc)
>>>>>                   dc_destroy(&adev->dm.dc);
>>>>> +
>>>>> + mutex_destroy(&adev->dm.dc_lock);
>>>>> +
>>>>>           return;
>>>>>       }
>>>>>       
>>>>> @@ -3615,10 +3623,43 @@ static int dm_plane_atomic_check(struct drm_plane 
>>>>> *plane,
>>>>>           return -EINVAL;
>>>>>       }
>>>>>       
>>>>> +static int dm_plane_atomic_async_check(struct drm_plane *plane,
>>>>> +                                struct drm_plane_state *new_plane_state)
>>>>> +{
>>>>> + /* Only support async updates on cursor planes. */
>>>>> + if (plane->type != DRM_PLANE_TYPE_CURSOR)
>>>>> +         return -EINVAL;
>>>>> +
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> +static void dm_plane_atomic_async_update(struct drm_plane *plane,
>>>>> +                                  struct drm_plane_state *new_state)
>>>>> +{
>>>>> + struct drm_plane_state *old_state =
>>>>> +         drm_atomic_get_old_plane_state(new_state->state, plane);
>>>>> +
>>>>> + if (plane->state->fb != new_state->fb)
>>>>> +         drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
>>>>> +
>>>>> + plane->state->src_x = new_state->src_x;
>>>>> + plane->state->src_y = new_state->src_y;
>>>>> + plane->state->src_w = new_state->src_w;
>>>>> + plane->state->src_h = new_state->src_h;
>>>>> + plane->state->crtc_x = new_state->crtc_x;
>>>>> + plane->state->crtc_y = new_state->crtc_y;
>>>>> + plane->state->crtc_w = new_state->crtc_w;
>>>>> + plane->state->crtc_h = new_state->crtc_h;
>>>>> +
>>>>> + handle_cursor_update(plane, old_state);
>>>>> +}
>>>>> +
>>>>>       static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
>>>>>           .prepare_fb = dm_plane_helper_prepare_fb,
>>>>>           .cleanup_fb = dm_plane_helper_cleanup_fb,
>>>>>           .atomic_check = dm_plane_atomic_check,
>>>>> + .atomic_async_check = dm_plane_atomic_async_check,
>>>>> + .atomic_async_update = dm_plane_atomic_async_update
>>>>>       };
>>>>>       
>>>>>       /*
>>>>> @@ -4307,6 +4348,7 @@ static int get_cursor_position(struct drm_plane 
>>>>> *plane, struct drm_crtc *crtc,
>>>>>       static void handle_cursor_update(struct drm_plane *plane,
>>>>>                                    struct drm_plane_state 
>>>>> *old_plane_state)
>>>>>       {
>>>>> + struct amdgpu_device *adev = plane->dev->dev_private;
>>>>>           struct amdgpu_framebuffer *afb = 
>>>>> to_amdgpu_framebuffer(plane->state->fb);
>>>>>           struct drm_crtc *crtc = afb ? plane->state->crtc : 
>>>>> old_plane_state->crtc;
>>>>>           struct dm_crtc_state *crtc_state = crtc ? 
>>>>> to_dm_crtc_state(crtc->state) : NULL;
>>>>> @@ -4331,9 +4373,12 @@ static void handle_cursor_update(struct drm_plane 
>>>>> *plane,
>>>>>       
>>>>>           if (!position.enable) {
>>>>>                   /* turn off cursor */
>>>>> -         if (crtc_state && crtc_state->stream)
>>>>> +         if (crtc_state && crtc_state->stream) {
>>>>> +                 mutex_lock(&adev->dm.dc_lock);
>>>>>                           
>>>>> dc_stream_set_cursor_position(crtc_state->stream,
>>>>>                                                         &position);
>>>>> +                 mutex_unlock(&adev->dm.dc_lock);
>>>>> +         }
>>>>>                   return;
>>>>>           }
>>>>>       
>>>>> @@ -4351,6 +4396,7 @@ static void handle_cursor_update(struct drm_plane 
>>>>> *plane,
>>>>>           attributes.pitch = attributes.width;
>>>>>       
>>>>>           if (crtc_state->stream) {
>>>>> +         mutex_lock(&adev->dm.dc_lock);
>>>>>                   if (!dc_stream_set_cursor_attributes(crtc_state->stream,
>>>>>                                                            &attributes))
>>>>>                           DRM_ERROR("DC failed to set cursor 
>>>>> attributes\n");
>>>>> @@ -4358,6 +4404,7 @@ static void handle_cursor_update(struct drm_plane 
>>>>> *plane,
>>>>>                   if (!dc_stream_set_cursor_position(crtc_state->stream,
>>>>>                                                      &position))
>>>>>                           DRM_ERROR("DC failed to set cursor position\n");
>>>>> +         mutex_unlock(&adev->dm.dc_lock);
>>>>>           }
>>>>>       }
>>>>>       
>>>>> @@ -4573,6 +4620,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>                                   &acrtc_state->stream->vrr_infopacket;
>>>>>           }
>>>>>       
>>>>> + mutex_lock(&adev->dm.dc_lock);
>>>>>           dc_commit_updates_for_stream(adev->dm.dc,
>>>>>                                                surface_updates,
>>>>>                                                1,
>>>>> @@ -4580,6 +4628,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>                                                &stream_update,
>>>>>                                                &surface_updates->surface,
>>>>>                                                state);
>>>>> + mutex_unlock(&adev->dm.dc_lock);
>>>>>       
>>>>>           DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
>>>>>                            __func__,
>>>>> @@ -4594,6 +4643,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
>>>>>        * with a dc_plane_state and follow the atomic model a bit more 
>>>>> closely here.
>>>>>        */
>>>>>       static bool commit_planes_to_stream(
>>>>> +         struct amdgpu_display_manager *dm,
>>>>>                   struct dc *dc,
>>>>>                   struct dc_plane_state **plane_states,
>>>>>                   uint8_t new_plane_count,
>>>>> @@ -4670,11 +4720,13 @@ static bool commit_planes_to_stream(
>>>>>                   updates[i].scaling_info = &scaling_info[i];
>>>>>           }
>>>>>       
>>>>> + mutex_lock(&dm->dc_lock);
>>>>>           dc_commit_updates_for_stream(
>>>>>                           dc,
>>>>>                           updates,
>>>>>                           new_plane_count,
>>>>>                           dc_stream, stream_update, plane_states, state);
>>>>> + mutex_unlock(&dm->dc_lock);
>>>>>       
>>>>>           kfree(flip_addr);
>>>>>           kfree(plane_info);
>>>>> @@ -4780,7 +4832,8 @@ static void amdgpu_dm_commit_planes(struct 
>>>>> drm_atomic_state *state,
>>>>>       
>>>>>                   dc_stream_attach->abm_level = acrtc_state->abm_level;
>>>>>       
>>>>> -         if (false == commit_planes_to_stream(dm->dc,
>>>>> +         if (false == commit_planes_to_stream(dm,
>>>>> +                                                 dm->dc,
>>>>>                                                           
>>>>> plane_states_constructed,
>>>>>                                                           planes_count,
>>>>>                                                           acrtc_state,
>>>>> @@ -4950,7 +5003,9 @@ static void amdgpu_dm_atomic_commit_tail(struct 
>>>>> drm_atomic_state *state)
>>>>>       
>>>>>           if (dc_state) {
>>>>>                   dm_enable_per_frame_crtc_master_sync(dc_state);
>>>>> +         mutex_lock(&dm->dc_lock);
>>>>>                   WARN_ON(!dc_commit_state(dm->dc, dc_state));
>>>>> +         mutex_unlock(&dm->dc_lock);
>>>>>           }
>>>>>       
>>>>>           for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
>>>>> @@ -5012,6 +5067,7 @@ static void amdgpu_dm_atomic_commit_tail(struct 
>>>>> drm_atomic_state *state)
>>>>>       
>>>>>                   /*TODO How it works with MPO ?*/
>>>>>                   if (!commit_planes_to_stream(
>>>>> +                         dm,
>>>>>                                   dm->dc,
>>>>>                                   status->plane_states,
>>>>>                                   status->plane_count,
>>>>> @@ -5904,6 +5960,13 @@ static int amdgpu_dm_atomic_check(struct 
>>>>> drm_device *dev,
>>>>>                           ret = -EINVAL;
>>>>>                           goto fail;
>>>>>                   }
>>>>> + } else if (state->legacy_cursor_update) {
>>>>> +         /*
>>>>> +          * This is a fast cursor update coming from the plane update
>>>>> +          * helper, check if it can be done asynchronously for better
>>>>> +          * performance.
>>>>> +          */
>>>>> +         state->async_update = !drm_atomic_helper_async_check(dev, 
>>>>> state);
>>>>>           }
>>>>>       
>>>>>           /* Must be success */
>>>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h 
>>>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> index 4326dc256491..25bb91ee80ba 100644
>>>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
>>>>> @@ -134,6 +134,14 @@ struct amdgpu_display_manager {
>>>>>       
>>>>>           struct drm_modeset_lock atomic_obj_lock;
>>>>>       
>>>>> + /**
>>>>> +  * @dc_lock:
>>>>> +  *
>>>>> +  * Guards access to DC functions that can issue register write
>>>>> +  * sequences.
>>>>> +  */
>>>>> + struct mutex dc_lock;
>>>>> +
>>>>>           /**
>>>>>            * @irq_handler_list_low_tab:
>>>>>            *
>>
> 

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amd/display: Add fast path for cursor plane updates

Reply via email to