On Sun, 23 Nov 2025, Vinod Govindapillai <[email protected]> wrote:
> One of the FBC instances can utilize the reserved area of SoC
> level cache for the fbc transactions to benefit reduced memory
> system power especially in idle scenarios. Reserved area of the
> system cache can be assigned to an fbc instance by configuring
> the cacheability configuration register with offset of the
> compressed frame buffer in stolen memoty of that fbc. There is
> a limit to this reserved area which is programmable and for
> xe3p_lpd the limit is defined as 2MB.
>
> v2: - better to track fbc sys cache usage from intel_display level,
>       sanitize the cacheability config register on probe (Matt)
>     - limit this for integrated graphics solutions, confirmed that
>       no default value set for cache range by hw (Gustavo)
>
> v3: - changes related to the use of fbc substruct in intel_display
>     - use intel_de_write() instead of intel_rmw() by hardcoding the
>       default value fields
>

I think overall the implementation feels a bit overwhelming. I mean
there are so many functions, so many checks, to the point of being
excessive.

Some comments inline.

> Bspec: 68881, 74722
> Signed-off-by: Vinod Govindapillai <[email protected]>
> ---
>  .../gpu/drm/i915/display/intel_display_core.h |  3 +
>  .../drm/i915/display/intel_display_device.h   |  1 +
>  drivers/gpu/drm/i915/display/intel_fbc.c      | 93 +++++++++++++++++++
>  drivers/gpu/drm/i915/display/intel_fbc_regs.h | 10 ++
>  4 files changed, 107 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h 
> b/drivers/gpu/drm/i915/display/intel_display_core.h
> index 58325f530670..f557c9293d33 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_core.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_core.h
> @@ -400,6 +400,9 @@ struct intel_display {
>  
>       struct {
>               struct intel_fbc *instances[I915_MAX_FBCS];
> +
> +             /* xe3p_lpd+ : FBC instance utlizing the system cache */

Please no space before :, *utilizing

> +             enum intel_fbc_id sys_cache_id;
>       } fbc;
>  
>       struct {
> diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h 
> b/drivers/gpu/drm/i915/display/intel_display_device.h
> index b559ef43d547..b74cb69ccc85 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_device.h
> +++ b/drivers/gpu/drm/i915/display/intel_display_device.h
> @@ -173,6 +173,7 @@ struct intel_display_platforms {
>  #define HAS_DSC_MST(__display)               (DISPLAY_VER(__display) >= 12 
> && HAS_DSC(__display))
>  #define HAS_FBC(__display)           
> (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0)
>  #define HAS_FBC_DIRTY_RECT(__display)        (DISPLAY_VER(__display) >= 30)
> +#define HAS_FBC_SYS_CACHE(__display) (DISPLAY_VER(__display) >= 35 && 
> !(__display)->platform.dgfx)
>  #define HAS_FPGA_DBG_UNCLAIMED(__display)    
> (DISPLAY_INFO(__display)->has_fpga_dbg)
>  #define HAS_FW_BLC(__display)                (DISPLAY_VER(__display) >= 3)
>  #define HAS_GMBUS_BURST_READ(__display)      (DISPLAY_VER(__display) >= 10 
> || (__display)->platform.kabylake)
> diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c 
> b/drivers/gpu/drm/i915/display/intel_fbc.c
> index dcdfcff80de3..d7e913792518 100644
> --- a/drivers/gpu/drm/i915/display/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/display/intel_fbc.c
> @@ -71,6 +71,10 @@
>       for_each_fbc_id((__display), (__fbc_id)) \
>               for_each_if((__fbc) = (__display)->fbc.instances[(__fbc_id)])
>  
> +#define SYS_CACHE_FBC_INSTANCE_NONE                  I915_MAX_FBCS
> +#define IS_SYS_CACHE_FBC_INSTANCE_NONE(__display)    
> ((__display)->fbc.sys_cache_id == SYS_CACHE_FBC_INSTANCE_NONE)

The only user of this has !IS_SYS_CACHE_FBC_INSTANCE_NONE(display)
i.e. "if not none".

Would be more useful to check if it's "set" or "valid", and avoid the
double negative. And use a shorter name. Maybe a static inline function.

But I'm not even sure the display->fbc.sys_cache_id needs to be
abstracted away. It's not a complicated thing, and, more importantly,
it's all within the same file. If outside access was needed, yes,
abstract, but here, not convinced.

Maybe:

        if (fbc_sys_cache_id_valid(display->fbc.sys_cache_id))

I also don't think *all* of these functions should be prefixed with
xe3p_lpd_ because that's a monster, and makes all of this difficult to
read.

> +#define IS_SYS_CACHE_FBC_INSTANCE_EQUALS(__display, id)      
> ((__display)->fbc.sys_cache_id == (id))

I think this feels cumbersome. Why not just check

        if (display->fbc.sys_cache_id == id)

inline, and it's obvious?

> +
>  struct intel_fbc_funcs {
>       void (*activate)(struct intel_fbc *fbc);
>       void (*deactivate)(struct intel_fbc *fbc);
> @@ -941,6 +945,79 @@ static void intel_fbc_program_workarounds(struct 
> intel_fbc *fbc)
>               fbc_compressor_clkgate_disable_wa(fbc, true);
>  }
>  
> +static void xe3p_lpd_fbc_set_sys_cache_fbc_id(struct intel_display *display,
> +                                           enum intel_fbc_id fbc_id)
> +{
> +     display->fbc.sys_cache_id = fbc_id;
> +}

Again, I'm not sure if this function is really needed. Just inline?

> +
> +static void xe3p_lpd_fbc_commit_sys_cache_usage(struct intel_display 
> *display,
> +                                             u32 reg)
> +{
> +     intel_de_write(display, XE3P_LPD_FBC_SYS_CACHE_USAGE_CFG, reg);
> +}

Not sure if this is needed. Just inline?

> +
> +static int xe3p_lpd_fbc_get_cache_limit(void)
> +{
> +     /* Default 2MB for xe3p_lpd */
> +     return 2 * 1024 * 1024;
> +}

Ditto. Especially odd how this multiplies and the user the divides by 64
* 1024.

> +
> +static void xe3p_lpd_fbc_clear_sys_cache_usage(struct intel_display *display)
> +{
> +     /* Clear all the fields except the default fields */
> +     u32 default_fields = FBC_SYS_CACHE_READ_ENABLE;
> +
> +     xe3p_lpd_fbc_commit_sys_cache_usage(display, default_fields);
> +
> +     /* Mark that no FBC instance utilize the system cache */
> +     xe3p_lpd_fbc_set_sys_cache_fbc_id(display, SYS_CACHE_FBC_INSTANCE_NONE);
> +}

My point above is that this function only calls wrappers and really does
nothing itself. It's too many layers for a simple thing.

fbc_sys_cache_disable()?

> +
> +static void xe3p_lpd_fbc_set_sys_cache_usage(const struct intel_fbc *fbc)
> +{
> +     struct intel_display *display = fbc->display;
> +     /* limit to be configured to the register in 64k byte chunks */
> +     int range = xe3p_lpd_fbc_get_cache_limit() / (64 * 1024);
> +     /* offset to be configured to the register in 4K byte chunks */
> +     int offset = i915_gem_stolen_node_offset(fbc->compressed_fb) / (4 * 
> 1024);
> +     /* Cache read enable is enabled by default */
> +     u32 usage = FBC_SYS_CACHE_TAG_USE_RES_SPACE |
> +                 FBC_SYS_CACHEABLE_RANGE(range) |
> +                 FBC_SYS_CACHE_START_BASE(offset) |
> +                 FBC_SYS_CACHE_READ_ENABLE;
> +
> +     lockdep_assert_held(&fbc->lock);
> +
> +     xe3p_lpd_fbc_commit_sys_cache_usage(display, usage);
> +
> +     xe3p_lpd_fbc_set_sys_cache_fbc_id(display, fbc->id);
> +}

Ditto.

fbc_sys_cache_enable()?

> +
> +static void xe3p_lpd_fbc_update_sys_cache_usage(const struct intel_fbc *fbc,
> +                                             bool set)
> +{
> +     struct intel_display *display = fbc->display;
> +
> +     lockdep_assert_held(&fbc->lock);
> +
> +     /* system cache for fbc already reserved */
> +     if (set && !IS_SYS_CACHE_FBC_INSTANCE_NONE(display))
> +             return;
> +
> +     /* cannot clear if "fbc" did not reserve the cache */
> +     if (!set && !IS_SYS_CACHE_FBC_INSTANCE_EQUALS(display, fbc->id))
> +             return;
> +
> +     if (set)
> +             xe3p_lpd_fbc_set_sys_cache_usage(fbc);
> +     else
> +             xe3p_lpd_fbc_clear_sys_cache_usage(display);
> +
> +     drm_dbg_kms(display->drm, "System cacheability usage for FBC[%d] %s\n",
> +                 fbc->id, set ? "configured" : "cleared");
> +}

Most of this function is two separate paths based on the parameter. I
think it would benefit from actually being two separate functions. So
why not just merge this with xe3p_lpd_fbc_set_sys_cache_usage() and
xe3p_lpd_fbc_clear_sys_cache_usage()?

> +
>  static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc)
>  {
>       if (WARN_ON(intel_fbc_hw_is_active(fbc)))
> @@ -967,6 +1044,9 @@ void intel_fbc_cleanup(struct intel_display *display)
>  
>               kfree(fbc);
>       }
> +
> +     if (HAS_FBC_SYS_CACHE(display))
> +             xe3p_lpd_fbc_clear_sys_cache_usage(display);

I don't think this should check for HAS_FBC_SYS_CACHE(). I think
internally the function being called should check if sys cache has been
set. And make sure it's only set on where it's available.

>  }
>  
>  static bool i8xx_fbc_stride_is_valid(const struct intel_plane_state 
> *plane_state)
> @@ -1780,6 +1860,9 @@ static void __intel_fbc_disable(struct intel_fbc *fbc)
>  
>       __intel_fbc_cleanup_cfb(fbc);
>  
> +     if (HAS_FBC_SYS_CACHE(display))
> +             xe3p_lpd_fbc_update_sys_cache_usage(fbc, false);
> +

Ditto. I'm also not sure why some places call the version with params,
and some others the version without params.

>       /* wa_18038517565 Enable DPFC clock gating after FBC disable */
>       if (display->platform.dg2 || DISPLAY_VER(display) >= 14)
>               fbc_compressor_clkgate_disable_wa(fbc, false);
> @@ -1972,6 +2055,9 @@ static void __intel_fbc_enable(struct 
> intel_atomic_state *state,
>  
>       intel_fbc_program_workarounds(fbc);
>       intel_fbc_program_cfb(fbc);
> +
> +     if (HAS_FBC_SYS_CACHE(display))
> +             xe3p_lpd_fbc_update_sys_cache_usage(fbc, true);

xe3p_lpd_fbc_update_sys_cache_usage() is the function that should check
for HAS_FBC_SYS_CACHE() in one place.

Well, maybe it should be renamed fbc_sys_cache_enable().

>  }
>  
>  /**
> @@ -2212,6 +2298,9 @@ void intel_fbc_init(struct intel_display *display)
>  
>       for_each_fbc_id(display, fbc_id)
>               display->fbc.instances[fbc_id] = intel_fbc_create(display, 
> fbc_id);
> +
> +     /* Mark that no FBC instance is using the system cache */
> +     xe3p_lpd_fbc_set_sys_cache_fbc_id(display, SYS_CACHE_FBC_INSTANCE_NONE);
>  }
>  
>  /**
> @@ -2231,6 +2320,10 @@ void intel_fbc_sanitize(struct intel_display *display)
>               if (intel_fbc_hw_is_active(fbc))
>                       intel_fbc_hw_deactivate(fbc);
>       }
> +
> +     /* Ensure the sys cache usage register gets cleared */
> +     if (HAS_FBC_SYS_CACHE(display))
> +             xe3p_lpd_fbc_clear_sys_cache_usage(display);

Ditto about checking for valid sys cache inside, not
HAS_FBC_SYS_CACHE().

>  }
>  
>  static int intel_fbc_debugfs_status_show(struct seq_file *m, void *unused)
> diff --git a/drivers/gpu/drm/i915/display/intel_fbc_regs.h 
> b/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> index b1d0161a3196..d2d889fa4bed 100644
> --- a/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> +++ b/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> @@ -126,4 +126,14 @@
>  #define   FBC_REND_NUKE                      REG_BIT(2)
>  #define   FBC_REND_CACHE_CLEAN               REG_BIT(1)
>  
> +#define XE3P_LPD_FBC_SYS_CACHE_USAGE_CFG     _MMIO(0x1344E0)
> +#define   FBC_SYS_CACHE_START_BASE_MASK              REG_GENMASK(31, 16)
> +#define   FBC_SYS_CACHE_START_BASE(base)     
> REG_FIELD_PREP(FBC_SYS_CACHE_START_BASE_MASK, (base))
> +#define   FBC_SYS_CACHEABLE_RANGE_MASK               REG_GENMASK(15, 4)
> +#define   FBC_SYS_CACHEABLE_RANGE(range)     
> REG_FIELD_PREP(FBC_SYS_CACHEABLE_RANGE_MASK, (range))
> +#define   FBC_SYS_CACHE_TAG_MASK             REG_GENMASK(3, 2)
> +#define   FBC_SYS_CACHE_TAG_DONT_CACHE               
> REG_FIELD_PREP(FBC_SYS_CACHE_TAG_MASK, 0)
> +#define   FBC_SYS_CACHE_TAG_USE_RES_SPACE    
> REG_FIELD_PREP(FBC_SYS_CACHE_TAG_MASK, 3)
> +#define   FBC_SYS_CACHE_READ_ENABLE          REG_BIT(0)
> +
>  #endif /* __INTEL_FBC_REGS__ */

-- 
Jani Nikula, Intel

Reply via email to