Hi Jani,

Thanks for the input.. replies inline...

On Mon, 2025-11-24 at 13:27 +0200, Jani Nikula wrote:
> On Sun, 23 Nov 2025, Vinod Govindapillai
> <[email protected]> wrote:
> > One of the FBC instances can utilize the reserved area of SoC
> > level cache for the fbc transactions to benefit reduced memory
> > system power especially in idle scenarios. Reserved area of the
> > system cache can be assigned to an fbc instance by configuring
> > the cacheability configuration register with offset of the
> > compressed frame buffer in stolen memoty of that fbc. There is
> > a limit to this reserved area which is programmable and for
> > xe3p_lpd the limit is defined as 2MB.
> > 
> > v2: - better to track fbc sys cache usage from intel_display level,
> >       sanitize the cacheability config register on probe (Matt)
> >     - limit this for integrated graphics solutions, confirmed that
> >       no default value set for cache range by hw (Gustavo)
> > 
> > v3: - changes related to the use of fbc substruct in intel_display
> >     - use intel_de_write() instead of intel_rmw() by hardcoding the
> >       default value fields
> > 
> 
> I think overall the implementation feels a bit overwhelming. I mean
> there are so many functions, so many checks, to the point of being
> excessive.
> 
> Some comments inline.
> 
> > Bspec: 68881, 74722
> > Signed-off-by: Vinod Govindapillai <[email protected]>
> > ---
> >  .../gpu/drm/i915/display/intel_display_core.h |  3 +
> >  .../drm/i915/display/intel_display_device.h   |  1 +
> >  drivers/gpu/drm/i915/display/intel_fbc.c      | 93
> > +++++++++++++++++++
> >  drivers/gpu/drm/i915/display/intel_fbc_regs.h | 10 ++
> >  4 files changed, 107 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h
> > b/drivers/gpu/drm/i915/display/intel_display_core.h
> > index 58325f530670..f557c9293d33 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_core.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_core.h
> > @@ -400,6 +400,9 @@ struct intel_display {
> >  
> >     struct {
> >             struct intel_fbc *instances[I915_MAX_FBCS];
> > +
> > +           /* xe3p_lpd+ : FBC instance utlizing the system
> > cache */
> 
> Please no space before :, *utilizing
> 
> > +           enum intel_fbc_id sys_cache_id;
> >     } fbc;
> >  
> >     struct {
> > diff --git a/drivers/gpu/drm/i915/display/intel_display_device.h
> > b/drivers/gpu/drm/i915/display/intel_display_device.h
> > index b559ef43d547..b74cb69ccc85 100644
> > --- a/drivers/gpu/drm/i915/display/intel_display_device.h
> > +++ b/drivers/gpu/drm/i915/display/intel_display_device.h
> > @@ -173,6 +173,7 @@ struct intel_display_platforms {
> >  #define HAS_DSC_MST(__display)             (DISPLAY_VER(__display) >=
> > 12 && HAS_DSC(__display))
> >  #define
> > HAS_FBC(__display)          (DISPLAY_RUNTIME_INFO(__display)->fbc_mask != 0)
> >  #define HAS_FBC_DIRTY_RECT(__display)      (DISPLAY_VER(__display) >=
> > 30)
> > +#define HAS_FBC_SYS_CACHE(__display)       (DISPLAY_VER(__display) >=
> > 35 && !(__display)->platform.dgfx)
> >  #define
> > HAS_FPGA_DBG_UNCLAIMED(__display)   (DISPLAY_INFO(__display)->has_fpga_dbg)
> >  #define HAS_FW_BLC(__display)              (DISPLAY_VER(__display) >=
> > 3)
> >  #define
> > HAS_GMBUS_BURST_READ(__display)     (DISPLAY_VER(__display) >= 10 || 
> > (__display)->platform.kabylake)
> > diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c
> > b/drivers/gpu/drm/i915/display/intel_fbc.c
> > index dcdfcff80de3..d7e913792518 100644
> > --- a/drivers/gpu/drm/i915/display/intel_fbc.c
> > +++ b/drivers/gpu/drm/i915/display/intel_fbc.c
> > @@ -71,6 +71,10 @@
> >     for_each_fbc_id((__display), (__fbc_id)) \
> >             for_each_if((__fbc) = (__display)-
> > >fbc.instances[(__fbc_id)])
> >  
> > +#define
> > SYS_CACHE_FBC_INSTANCE_NONE                 I915_MAX_FBCS
> > +#define
> > IS_SYS_CACHE_FBC_INSTANCE_NONE(__display)   
> > ((__display)->fbc.sys_cache_id==SYS_CACHE_FBC_INSTANCE_NONE)
> 
> The only user of this has !IS_SYS_CACHE_FBC_INSTANCE_NONE(display)
> i.e. "if not none".
> 
> Would be more useful to check if it's "set" or "valid", and avoid the
> double negative. And use a shorter name. Maybe a static inline
> function.
> 
> But I'm not even sure the display->fbc.sys_cache_id needs to be
> abstracted away. It's not a complicated thing, and, more importantly,
> it's all within the same file. If outside access was needed, yes,
> abstract, but here, not convinced.
> 
> Maybe:
> 
>       if (fbc_sys_cache_id_valid(display->fbc.sys_cache_id))
> 
> I also don't think *all* of these functions should be prefixed with
> xe3p_lpd_ because that's a monster, and makes all of this difficult
> to
> read.
> 
> > +#define IS_SYS_CACHE_FBC_INSTANCE_EQUALS(__display,
> > id) ((__display)->fbc.sys_cache_id == (id))
> 
> I think this feels cumbersome. Why not just check
> 
>       if (display->fbc.sys_cache_id == id)
> 
> inline, and it's obvious?
> 
> > +
> >  struct intel_fbc_funcs {
> >     void (*activate)(struct intel_fbc *fbc);
> >     void (*deactivate)(struct intel_fbc *fbc);
> > @@ -941,6 +945,79 @@ static void
> > intel_fbc_program_workarounds(struct intel_fbc *fbc)
> >             fbc_compressor_clkgate_disable_wa(fbc, true);
> >  }
> >  
> > +static void xe3p_lpd_fbc_set_sys_cache_fbc_id(struct intel_display
> > *display,
> > +                                         enum intel_fbc_id
> > fbc_id)
> > +{
> > +   display->fbc.sys_cache_id = fbc_id;
> > +}
> 
> Again, I'm not sure if this function is really needed. Just inline?

Ack for the above suggestions. About the function naming, yeah it is
indeed bit complex, but was following the functions names being
followed in this file. Should it be prefixed with nvl /intel or just
fbc?  instead? Any suggestion?

> 
> > +
> > +static void xe3p_lpd_fbc_commit_sys_cache_usage(struct
> > intel_display *display,
> > +                                           u32 reg)
> > +{
> > +   intel_de_write(display, XE3P_LPD_FBC_SYS_CACHE_USAGE_CFG,
> > reg);
> > +}
> 
> Not sure if this is needed. Just inline?

Two paths update this register - setting up the cache for a fbc and
clearing it. Also soon there is going to be a workaround implemented
which we need to disable the read cache enable bit which is set on by
default. So I was thinking of intercept here and apply the wa

if (intel_display_wa(display, xxxx))
                reg &= ~FBC_SYS_CACHE_READ_ENABLE;

> 
> > +
> > +static int xe3p_lpd_fbc_get_cache_limit(void)
> > +{
> > +   /* Default 2MB for xe3p_lpd */
> > +   return 2 * 1024 * 1024;
> > +}
> 
> Ditto. Especially odd how this multiplies and the user the divides by
> 64
> * 1024.

Initially from HAS my understanding was that this was supposed to be
set by a pre-os fw and driver dont modify this field. And this can
change per platform. But from the recent logs fro, these fields are set
as 0 and has to be programmed by the driver. (I am clarifying this with
the HW team). Anyway, this need to be variated per platform. In some
other component such 2MB was hard coded like this. So thought of
adopting that. Any suggestion?


> > +
> > +static void xe3p_lpd_fbc_clear_sys_cache_usage(struct
> > intel_display *display)
> > +{
> > +   /* Clear all the fields except the default fields */
> > +   u32 default_fields = FBC_SYS_CACHE_READ_ENABLE;
> > +
> > +   xe3p_lpd_fbc_commit_sys_cache_usage(display,
> > default_fields);
> > +
> > +   /* Mark that no FBC instance utilize the system cache */
> > +   xe3p_lpd_fbc_set_sys_cache_fbc_id(display,
> > SYS_CACHE_FBC_INSTANCE_NONE);
> > +}
> 
> My point above is that this function only calls wrappers and really
> does
> nothing itself. It's too many layers for a simple thing.
> 
> fbc_sys_cache_disable()?
> 
> > +
> > +static void xe3p_lpd_fbc_set_sys_cache_usage(const struct
> > intel_fbc *fbc)
> > +{
> > +   struct intel_display *display = fbc->display;
> > +   /* limit to be configured to the register in 64k byte
> > chunks */
> > +   int range = xe3p_lpd_fbc_get_cache_limit() / (64 * 1024);
> > +   /* offset to be configured to the register in 4K byte
> > chunks */
> > +   int offset = i915_gem_stolen_node_offset(fbc-
> > >compressed_fb) / (4 * 1024);
> > +   /* Cache read enable is enabled by default */
> > +   u32 usage = FBC_SYS_CACHE_TAG_USE_RES_SPACE |
> > +               FBC_SYS_CACHEABLE_RANGE(range) |
> > +               FBC_SYS_CACHE_START_BASE(offset) |
> > +               FBC_SYS_CACHE_READ_ENABLE;
> > +
> > +   lockdep_assert_held(&fbc->lock);
> > +
> > +   xe3p_lpd_fbc_commit_sys_cache_usage(display, usage);
> > +
> > +   xe3p_lpd_fbc_set_sys_cache_fbc_id(display, fbc->id);
> > +}
> 
> Ditto.
> 
> fbc_sys_cache_enable()?
> 
> > +
> > +static void xe3p_lpd_fbc_update_sys_cache_usage(const struct
> > intel_fbc *fbc,
> > +                                           bool set)
> > +{
> > +   struct intel_display *display = fbc->display;
> > +
> > +   lockdep_assert_held(&fbc->lock);
> > +
> > +   /* system cache for fbc already reserved */
> > +   if (set && !IS_SYS_CACHE_FBC_INSTANCE_NONE(display))
> > +           return;
> > +
> > +   /* cannot clear if "fbc" did not reserve the cache */
> > +   if (!set && !IS_SYS_CACHE_FBC_INSTANCE_EQUALS(display,
> > fbc->id))
> > +           return;
> > +
> > +   if (set)
> > +           xe3p_lpd_fbc_set_sys_cache_usage(fbc);
> > +   else
> > +           xe3p_lpd_fbc_clear_sys_cache_usage(display);
> > +
> > +   drm_dbg_kms(display->drm, "System cacheability usage for
> > FBC[%d] %s\n",
> > +               fbc->id, set ? "configured" : "cleared");
> > +}
> 
> Most of this function is two separate paths based on the parameter. I
> think it would benefit from actually being two separate functions. So
> why not just merge this with xe3p_lpd_fbc_set_sys_cache_usage() and
> xe3p_lpd_fbc_clear_sys_cache_usage()?

Actually there are three paths to update this register (two separate
path to clear this register)

1. As part of enable FBC - depends on the fbc instance (with fbc mutex
taken). If sys_cache is not reserved by any other instance.

2. As part of disable FBC - depends on the fbc instance to avoid
clearing if this fbc didnt reserve the cache space. (with fbc mutex
taken)

3. Clear this register as part of sanitize call (upon setup_hw_state()
calls upon probe and resume) and intel_fbc_cleanup() upon module
removal. Not depend on the fbc instance.

So something like this make sense?

fbc_program_sys_cache(u32 reg)
{
        intel_de_write(reg)
}

fbc_sys_cache_reset(display)
{
        if (!HAS_FBC_SYS_CACHE())
                return

        fbc_program_sys_cache(0)
        fbc.sys_cache_id = SYS_CACHE_FBC_INSTANCE_NONE
}

fbc_sys_cache_disable(fbc)
{
        if (!HAS_FBC_SYS_CACHE())
                return

        if (fbc.sys_cache_id != fbc->id)
                return;

        fbc_sys_cache_reset(display)
}

fbc_sys_cache_enable(fbc)
{
        if (!HAS_FBC_SYS_CACHE())
                return

        if (fbc.sys_cache_id != SYS_CACHE_FBC_INSTANCE_NONE)
                return;

        fbc_program_sys_cache(val)

        fbc.sys_cache_id = fbc-->id
}

intel_fbc_enable() will call fbc_sys_cache_enable(fbc)

__intel_fbc_disable() will call fbc_sys_cache_disable(fbc)

and 

intel_fbc_cleanup() and intel_fbc_sanitize() will call
fbc_sys_cache_reset(display) as a general cleanup for this register.



> 
> > +
> >  static void __intel_fbc_cleanup_cfb(struct intel_fbc *fbc)
> >  {
> >     if (WARN_ON(intel_fbc_hw_is_active(fbc)))
> > @@ -967,6 +1044,9 @@ void intel_fbc_cleanup(struct intel_display
> > *display)
> >  
> >             kfree(fbc);
> >     }
> > +
> > +   if (HAS_FBC_SYS_CACHE(display))
> > +           xe3p_lpd_fbc_clear_sys_cache_usage(display);
> 
> I don't think this should check for HAS_FBC_SYS_CACHE(). I think
> internally the function being called should check if sys cache has
> been
> set. And make sure it's only set on where it's available.
> 
> >  }
> >  
> >  static bool i8xx_fbc_stride_is_valid(const struct
> > intel_plane_state *plane_state)
> > @@ -1780,6 +1860,9 @@ static void __intel_fbc_disable(struct
> > intel_fbc *fbc)
> >  
> >     __intel_fbc_cleanup_cfb(fbc);
> >  
> > +   if (HAS_FBC_SYS_CACHE(display))
> > +           xe3p_lpd_fbc_update_sys_cache_usage(fbc, false);
> > +
> 
> Ditto. I'm also not sure why some places call the version with
> params,
> and some others the version without params.
> 
> >     /* wa_18038517565 Enable DPFC clock gating after FBC
> > disable */
> >     if (display->platform.dg2 || DISPLAY_VER(display) >= 14)
> >             fbc_compressor_clkgate_disable_wa(fbc, false);
> > @@ -1972,6 +2055,9 @@ static void __intel_fbc_enable(struct
> > intel_atomic_state *state,
> >  
> >     intel_fbc_program_workarounds(fbc);
> >     intel_fbc_program_cfb(fbc);
> > +
> > +   if (HAS_FBC_SYS_CACHE(display))
> > +           xe3p_lpd_fbc_update_sys_cache_usage(fbc, true);
> 
> xe3p_lpd_fbc_update_sys_cache_usage() is the function that should
> check
> for HAS_FBC_SYS_CACHE() in one place.
> 
> Well, maybe it should be renamed fbc_sys_cache_enable().
> 
> >  }
> >  
> >  /**
> > @@ -2212,6 +2298,9 @@ void intel_fbc_init(struct intel_display
> > *display)
> >  
> >     for_each_fbc_id(display, fbc_id)
> >             display->fbc.instances[fbc_id] =
> > intel_fbc_create(display, fbc_id);
> > +
> > +   /* Mark that no FBC instance is using the system cache */
> > +   xe3p_lpd_fbc_set_sys_cache_fbc_id(display,
> > SYS_CACHE_FBC_INSTANCE_NONE);
> >  }
> >  
> >  /**
> > @@ -2231,6 +2320,10 @@ void intel_fbc_sanitize(struct intel_display
> > *display)
> >             if (intel_fbc_hw_is_active(fbc))
> >                     intel_fbc_hw_deactivate(fbc);
> >     }
> > +
> > +   /* Ensure the sys cache usage register gets cleared */
> > +   if (HAS_FBC_SYS_CACHE(display))
> > +           xe3p_lpd_fbc_clear_sys_cache_usage(display);
> 
> Ditto about checking for valid sys cache inside, not
> HAS_FBC_SYS_CACHE().
> 
> >  }
> >  
> >  static int intel_fbc_debugfs_status_show(struct seq_file *m, void
> > *unused)
> > diff --git a/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> > b/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> > index b1d0161a3196..d2d889fa4bed 100644
> > --- a/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> > +++ b/drivers/gpu/drm/i915/display/intel_fbc_regs.h
> > @@ -126,4 +126,14 @@
> >  #define   FBC_REND_NUKE                    REG_BIT(2)
> >  #define   FBC_REND_CACHE_CLEAN             REG_BIT(1)
> >  
> > +#define XE3P_LPD_FBC_SYS_CACHE_USAGE_CFG   _MMIO(0x1344E0)
> > +#define  
> > FBC_SYS_CACHE_START_BASE_MASK               REG_GENMASK(31, 16)
> > +#define  
> > FBC_SYS_CACHE_START_BASE(base)      
> > REG_FIELD_PREP(FBC_SYS_CACHE_START_BASE_MASK,(base))
> > +#define   FBC_SYS_CACHEABLE_RANGE_MASK             REG_GENMASK(15, 4)
> > +#define  
> > FBC_SYS_CACHEABLE_RANGE(range)      
> > REG_FIELD_PREP(FBC_SYS_CACHEABLE_RANGE_MASK,(range))
> > +#define   FBC_SYS_CACHE_TAG_MASK           REG_GENMASK(3, 2)
> > +#define  
> > FBC_SYS_CACHE_TAG_DONT_CACHE                
> > REG_FIELD_PREP(FBC_SYS_CACHE_TAG_MASK,0)
> > +#define  
> > FBC_SYS_CACHE_TAG_USE_RES_SPACE     REG_FIELD_PREP(FBC_SYS_CACHE_TAG_MASK,3)
> > +#define   FBC_SYS_CACHE_READ_ENABLE                REG_BIT(0)
> > +
> >  #endif /* __INTEL_FBC_REGS__ */
> 

Reply via email to