tegra: dc: Support memory bandwidth management

Michał Mirosław Thu, 18 Mar 2021 02:34:00 -0700

On Wed, Mar 17, 2021 at 09:57:33PM +0300, Dmitry Osipenko wrote:
[...]
> --- a/drivers/gpu/drm/tegra/dc.c
> +++ b/drivers/gpu/drm/tegra/dc.c
> @@ -8,6 +8,7 @@
>  #include <linux/debugfs.h>
>  #include <linux/delay.h>
>  #include <linux/iommu.h>
> +#include <linux/interconnect.h>
>  #include <linux/module.h>
>  #include <linux/of_device.h>
>  #include <linux/pm_runtime.h>
> @@ -618,6 +619,9 @@ static int tegra_plane_atomic_check(struct drm_plane 
> *plane,
>       struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
>       int err;
>  
> +     plane_state->peak_memory_bandwidth = 0;
> +     plane_state->avg_memory_bandwidth = 0;
> +
>       /* no need for further checks if the plane is being disabled */
>       if (!new_plane_state->crtc)
>               return 0;
> @@ -808,6 +812,12 @@ static struct drm_plane 
> *tegra_primary_plane_create(struct drm_device *drm,
>       formats = dc->soc->primary_formats;
>       modifiers = dc->soc->modifiers;
>  
> +     err = tegra_plane_interconnect_init(plane);
> +     if (err) {
> +             kfree(plane);
> +             return ERR_PTR(err);
> +     }
> +
>       err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
>                                      &tegra_plane_funcs, formats,
>                                      num_formats, modifiers, type, NULL);
> @@ -841,9 +851,13 @@ static int tegra_cursor_atomic_check(struct drm_plane 
> *plane,
>  {
>       struct drm_plane_state *new_plane_state = 
> drm_atomic_get_new_plane_state(state,
>                                                                               
>  plane);
> +     struct tegra_plane_state *plane_state = 
> to_tegra_plane_state(new_plane_state);
>       struct tegra_plane *tegra = to_tegra_plane(plane);
>       int err;
>  
> +     plane_state->peak_memory_bandwidth = 0;
> +     plane_state->avg_memory_bandwidth = 0;
> +
>       /* no need for further checks if the plane is being disabled */
>       if (!new_plane_state->crtc)
>               return 0;
> @@ -985,6 +999,12 @@ static struct drm_plane 
> *tegra_dc_cursor_plane_create(struct drm_device *drm,
>       num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
>       formats = tegra_cursor_plane_formats;
>  
> +     err = tegra_plane_interconnect_init(plane);
> +     if (err) {
> +             kfree(plane);
> +             return ERR_PTR(err);
> +     }
> +
>       err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
>                                      &tegra_plane_funcs, formats,
>                                      num_formats, NULL,
> @@ -1099,6 +1119,12 @@ static struct drm_plane 
> *tegra_dc_overlay_plane_create(struct drm_device *drm,
>       num_formats = dc->soc->num_overlay_formats;
>       formats = dc->soc->overlay_formats;
>  
> +     err = tegra_plane_interconnect_init(plane);
> +     if (err) {
> +             kfree(plane);
> +             return ERR_PTR(err);
> +     }
> +
>       if (!cursor)
>               type = DRM_PLANE_TYPE_OVERLAY;
>       else
> @@ -1216,6 +1242,7 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
>  {
>       struct tegra_dc_state *state = to_dc_state(crtc->state);
>       struct tegra_dc_state *copy;
> +     unsigned int i;
>  
>       copy = kmalloc(sizeof(*copy), GFP_KERNEL);
>       if (!copy)
> @@ -1227,6 +1254,9 @@ tegra_crtc_atomic_duplicate_state(struct drm_crtc *crtc)
>       copy->div = state->div;
>       copy->planes = state->planes;
>  
> +     for (i = 0; i < ARRAY_SIZE(state->plane_peak_bw); i++)
> +             copy->plane_peak_bw[i] = state->plane_peak_bw[i];
> +
>       return &copy->base;
>  }
>  
> @@ -1753,6 +1783,106 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, 
> unsigned long timeout)
>       return -ETIMEDOUT;
>  }
>  
> +static void
> +tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc,
> +                                struct drm_atomic_state *state,
> +                                bool prepare_bandwidth_transition)
> +{
> +     const struct tegra_plane_state *old_tegra_state, *new_tegra_state;
> +     const struct tegra_dc_state *old_dc_state, *new_dc_state;
> +     u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw;
> +     const struct drm_plane_state *old_plane_state;
> +     const struct drm_crtc_state *old_crtc_state;
> +     struct tegra_dc_window window, old_window;
> +     struct tegra_dc *dc = to_tegra_dc(crtc);
> +     struct tegra_plane *tegra;
> +     struct drm_plane *plane;
> +
> +     if (dc->soc->has_nvdisplay)
> +             return;
> +
> +     old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc);
> +     old_dc_state = to_const_dc_state(old_crtc_state);
> +     new_dc_state = to_const_dc_state(crtc->state);
> +
> +     if (!crtc->state->active) {
> +             if (!old_crtc_state->active)
> +                     return;
> +
> +             /*
> +              * When CRTC is disabled on DPMS, the state of attached planes
> +              * is kept unchanged. Hence we need to enforce removal of the
> +              * bandwidths from the ICC paths.
> +              */
> +             drm_atomic_crtc_for_each_plane(plane, crtc) {
> +                     tegra = to_tegra_plane(plane);
> +
> +                     icc_set_bw(tegra->icc_mem, 0, 0);
> +                     icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
> +             }
> +
> +             return;
> +     }
> +
> +     for_each_old_plane_in_state(old_crtc_state->state, plane,
> +                                 old_plane_state, i) {
> +             old_tegra_state = to_const_tegra_plane_state(old_plane_state);
> +             new_tegra_state = to_const_tegra_plane_state(plane->state);
> +             tegra = to_tegra_plane(plane);
> +
> +             /*
> +              * We're iterating over the global atomic state and it contains
> +              * planes from another CRTC, hence we need to filter out the
> +              * planes unrelated to this CRTC.
> +              */
> +             if (tegra->dc != dc)
> +                     continue;
> +
> +             new_avg_bw = new_tegra_state->avg_memory_bandwidth;
> +             old_avg_bw = old_tegra_state->avg_memory_bandwidth;
> +
> +             new_peak_bw = new_dc_state->plane_peak_bw[tegra->index];
> +             old_peak_bw = old_dc_state->plane_peak_bw[tegra->index];
> +
> +             /*
> +              * See the comment related to !crtc->state->active above,
> +              * which explains why bandwidths need to be updated when
> +              * CRTC is turning ON.
> +              */
> +             if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw &&
> +                 old_crtc_state->active)
> +                     continue;
> +
> +             window.src.h = drm_rect_height(&plane->state->src) >> 16;
> +             window.dst.h = drm_rect_height(&plane->state->dst);
> +
> +             old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16;
> +             old_window.dst.h = drm_rect_height(&old_plane_state->dst);
> +
> +             /*
> +              * During the preparation phase (atomic_begin), the memory
> +              * freq should go high before the DC changes are committed
> +              * if bandwidth requirement goes up, otherwise memory freq
> +              * should to stay high if BW requirement goes down.  The
> +              * opposite applies to the completion phase (post_commit).
> +              */
> +             if (prepare_bandwidth_transition) {
> +                     new_avg_bw = max(old_avg_bw, new_avg_bw);
> +                     new_peak_bw = max(old_peak_bw, new_peak_bw);
> +
> +                     if (tegra_plane_use_vertical_filtering(tegra, 
> &old_window))
> +                             window = old_window;
> +             }
> +
> +             icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw);
> +
> +             if (tegra_plane_use_vertical_filtering(tegra, &window))
> +                     icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, 
> new_peak_bw);
> +             else
> +                     icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
> +     }
> +}
> +
>  static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
>                                     struct drm_atomic_state *state)
>  {
> @@ -1934,6 +2064,8 @@ static void tegra_crtc_atomic_begin(struct drm_crtc 
> *crtc,
>  {
>       unsigned long flags;
>  
> +     tegra_crtc_update_memory_bandwidth(crtc, state, true);
> +
>       if (crtc->state->event) {
>               spin_lock_irqsave(&crtc->dev->event_lock, flags);
>  
> @@ -1966,7 +2098,215 @@ static void tegra_crtc_atomic_flush(struct drm_crtc 
> *crtc,
>       value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
>  }
>  
> +static bool tegra_plane_is_cursor(const struct drm_plane_state *state)
> +{
> +     const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc;
> +     const struct drm_format_info *fmt = state->fb->format;
> +     unsigned int src_w = drm_rect_width(&state->src) >> 16;
> +     unsigned int dst_w = drm_rect_width(&state->dst);
> +
> +     if (state->plane->type != DRM_PLANE_TYPE_CURSOR)
> +             return false;
> +
> +     if (soc->supports_cursor)
> +             return true;
> +
> +     if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256)
> +             return false;
> +
> +     return true;
> +}
> +
> +static unsigned long
> +tegra_plane_overlap_mask(struct drm_crtc_state *state,
> +                      const struct drm_plane_state *plane_state)
> +{
> +     const struct drm_plane_state *other_state;
> +     const struct tegra_plane *tegra;
> +     unsigned long overlap_mask = 0;
> +     struct drm_plane *plane;
> +     struct drm_rect rect;
> +
> +     if (!plane_state->visible || !plane_state->fb)
> +             return 0;
> +
> +     /*
> +      * Data-prefetch FIFO will easily help to overcome temporal memory
> +      * pressure if other plane overlaps with the cursor plane.
> +      */
> +     if (tegra_plane_is_cursor(plane_state))
> +             return 0;
> +
> +     drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) {
> +             rect = plane_state->dst;
> +
> +             tegra = to_tegra_plane(other_state->plane);
> +
> +             if (!other_state->visible || !other_state->fb)
> +                     continue;
> +
> +             /*
> +              * Ignore cursor plane overlaps because it's not practical to
> +              * assume that it contributes to the bandwidth in overlapping
> +              * area if window width is small.
> +              */
> +             if (tegra_plane_is_cursor(other_state))
> +                     continue;
> +
> +             if (drm_rect_intersect(&rect, &other_state->dst))
> +                     overlap_mask |= BIT(tegra->index);
> +     }
> +
> +     return overlap_mask;
> +}
> +
> +static struct drm_plane *
> +tegra_crtc_get_plane_by_index(struct drm_crtc *crtc, unsigned int index)
> +{
> +     struct drm_plane *plane;
> +
> +     drm_atomic_crtc_for_each_plane(plane, crtc) {
> +             if (to_tegra_plane(plane)->index == index)
> +                     return plane;
> +     }
> +
> +     return NULL;
> +}
> +
> +static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc,
> +                                              struct drm_atomic_state *state)
> +{
> +     ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask;
> +     u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {};
> +     bool all_planes_overlap_simultaneously = true;
> +     const struct tegra_plane_state *tegra_state;
> +     const struct drm_plane_state *plane_state;
> +     const struct tegra_dc_state *old_dc_state;
> +     struct tegra_dc *dc = to_tegra_dc(crtc);
> +     const struct drm_crtc_state *old_state;
> +     struct tegra_dc_state *new_dc_state;
> +     struct drm_crtc_state *new_state;
> +     struct tegra_plane *tegra;
> +     struct drm_plane *plane;
> +     u32 i, k, overlap_bw;
> +
> +     /*
> +      * The nv-display uses shared planes.  The algorithm below assumes
> +      * maximum 3 planes per-CRTC, this assumption isn't applicable to
> +      * the nv-display.  Note that T124 support has additional windows,
> +      * but currently they aren't supported by the driver.
> +      */
> +     if (dc->soc->has_nvdisplay)
> +             return 0;
> +
> +     new_state = drm_atomic_get_new_crtc_state(state, crtc);
> +     new_dc_state = to_dc_state(new_state);
> +
> +     /*
> +      * For overlapping planes pixel's data is fetched for each plane at
> +      * the same time, hence bandwidths are accumulated in this case.
> +      * This needs to be taken into account for calculating total bandwidth
> +      * consumed by all planes.
> +      *
> +      * Here we get the overlapping state of each plane, which is a
> +      * bitmask of plane indices telling with what planes there is an
> +      * overlap. Note that bitmask[plane] includes BIT(plane) in order
> +      * to make further code nicer and simpler.
> +      */
> +     drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, 
> new_state) {
> +             tegra_state = to_const_tegra_plane_state(plane_state);
> +             tegra = to_tegra_plane(plane);
> +
> +             if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM))
> +                     return -EINVAL;
> +
> +             plane_peak_bw[tegra->index] = 
> tegra_state->peak_memory_bandwidth;
> +             mask = tegra_plane_overlap_mask(new_state, plane_state);
> +             overlap_mask[tegra->index] = mask;
> +
> +             if (hweight_long(mask) != 3)
> +                     all_planes_overlap_simultaneously = false;
> +     }
> +
> +     old_state = drm_atomic_get_old_crtc_state(state, crtc);
> +     old_dc_state = to_const_dc_state(old_state);
> +
> +     /*
> +      * Then we calculate maximum bandwidth of each plane state.
> +      * The bandwidth includes the plane BW + BW of the "simultaneously"
> +      * overlapping planes, where "simultaneously" means areas where DC
> +      * fetches from the planes simultaneously during of scan-out process.
> +      *
> +      * For example, if plane A overlaps with planes B and C, but B and C
> +      * don't overlap, then the peak bandwidth will be either in area where
> +      * A-and-B or A-and-C planes overlap.
> +      *
> +      * The plane_peak_bw[] contains peak memory bandwidth values of
> +      * each plane, this information is needed by interconnect provider
> +      * in order to set up latency allowness based on the peak BW, see
> +      * tegra_crtc_update_memory_bandwidth().
> +      */
> +     for (i = 0; i < ARRAY_SIZE(plane_peak_bw); i++) {
> +             overlap_bw = 0;
> +
> +             for_each_set_bit(k, &overlap_mask[i], 3) {
> +                     if (k == i)
> +                             continue;
> +
> +                     if (all_planes_overlap_simultaneously)
> +                             overlap_bw += plane_peak_bw[k];
> +                     else
> +                             overlap_bw = max(overlap_bw, plane_peak_bw[k]);
> +             }
> +
> +             new_dc_state->plane_peak_bw[i] = plane_peak_bw[i] + overlap_bw;
> +
> +             /*
> +              * If plane's peak bandwidth changed (for example plane isn't
> +              * overlapped anymore) and plane isn't in the atomic state,
> +              * then add plane to the state in order to have the bandwidth
> +              * updated.
> +              */
> +             if (old_dc_state->plane_peak_bw[i] !=
> +                 new_dc_state->plane_peak_bw[i]) {
> +                     plane = tegra_crtc_get_plane_by_index(crtc, i);
> +                     if (!plane)
> +                             continue;
> +
> +                     plane_state = drm_atomic_get_plane_state(state, plane);
> +                     if (IS_ERR(plane_state))
> +                             return PTR_ERR(plane_state);
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
> +                                struct drm_atomic_state *state)
> +{
> +     int err;
> +
> +     err = tegra_crtc_calculate_memory_bandwidth(crtc, state);
> +     if (err)
> +             return err;
> +
> +     return 0;
> +}
> +
> +void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
> +                                struct drm_atomic_state *state)
> +{
> +     /*
> +      * Display bandwidth is allowed to go down only once hardware state
> +      * is known to be armed, i.e. state was committed and VBLANK event
> +      * received.
> +      */
> +     tegra_crtc_update_memory_bandwidth(crtc, state, false);
> +}
> +
>  static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
> +     .atomic_check = tegra_crtc_atomic_check,
>       .atomic_begin = tegra_crtc_atomic_begin,
>       .atomic_flush = tegra_crtc_atomic_flush,
>       .atomic_enable = tegra_crtc_atomic_enable,
> @@ -2257,7 +2597,9 @@ static const struct tegra_dc_soc_info 
> tegra20_dc_soc_info = {
>       .overlay_formats = tegra20_overlay_formats,
>       .modifiers = tegra20_modifiers,
>       .has_win_a_without_filters = true,
> +     .has_win_b_vfilter_mem_client = true,
>       .has_win_c_without_vert_filter = true,
> +     .plane_tiled_memory_bandwidth_x2 = false,
>  };
>  
>  static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
> @@ -2276,7 +2618,9 @@ static const struct tegra_dc_soc_info 
> tegra30_dc_soc_info = {
>       .overlay_formats = tegra20_overlay_formats,
>       .modifiers = tegra20_modifiers,
>       .has_win_a_without_filters = false,
> +     .has_win_b_vfilter_mem_client = true,
>       .has_win_c_without_vert_filter = false,
> +     .plane_tiled_memory_bandwidth_x2 = true,
>  };
>  
>  static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
> @@ -2295,7 +2639,9 @@ static const struct tegra_dc_soc_info 
> tegra114_dc_soc_info = {
>       .overlay_formats = tegra114_overlay_formats,
>       .modifiers = tegra20_modifiers,
>       .has_win_a_without_filters = false,
> +     .has_win_b_vfilter_mem_client = false,
>       .has_win_c_without_vert_filter = false,
> +     .plane_tiled_memory_bandwidth_x2 = true,
>  };
>  
>  static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
> @@ -2314,7 +2660,9 @@ static const struct tegra_dc_soc_info 
> tegra124_dc_soc_info = {
>       .overlay_formats = tegra124_overlay_formats,
>       .modifiers = tegra124_modifiers,
>       .has_win_a_without_filters = false,
> +     .has_win_b_vfilter_mem_client = false,
>       .has_win_c_without_vert_filter = false,
> +     .plane_tiled_memory_bandwidth_x2 = false,
>  };
>  
>  static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
> @@ -2333,7 +2681,9 @@ static const struct tegra_dc_soc_info 
> tegra210_dc_soc_info = {
>       .overlay_formats = tegra114_overlay_formats,
>       .modifiers = tegra124_modifiers,
>       .has_win_a_without_filters = false,
> +     .has_win_b_vfilter_mem_client = false,
>       .has_win_c_without_vert_filter = false,
> +     .plane_tiled_memory_bandwidth_x2 = false,
>  };
>  
>  static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
> @@ -2382,6 +2732,7 @@ static const struct tegra_dc_soc_info 
> tegra186_dc_soc_info = {
>       .has_nvdisplay = true,
>       .wgrps = tegra186_dc_wgrps,
>       .num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
> +     .plane_tiled_memory_bandwidth_x2 = false,
>  };
>  
>  static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
> @@ -2430,6 +2781,7 @@ static const struct tegra_dc_soc_info 
> tegra194_dc_soc_info = {
>       .has_nvdisplay = true,
>       .wgrps = tegra194_dc_wgrps,
>       .num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
> +     .plane_tiled_memory_bandwidth_x2 = false,
>  };


For globals you will have .x = false by default; I'm not sure those entries
add much value.

Reviewed-by: Michał Mirosław <[email protected]>

Re: [PATCH v16 1/2] drm/tegra: dc: Support memory bandwidth management

Reply via email to