Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Lisovskiy, Stanislav
On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> Each VDSC operates with 1ppc throughput, hence enable the second
> VDSC engine when moderate is higher that the current cdclk.
> 
> Signed-off-by: Vandita Kulkarni 
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
> b/drivers/gpu/drm/i915/display/intel_dp.c
> index 161c33b2c869..55878f65f724 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -70,6 +70,7 @@
>  #include "intel_tc.h"
>  #include "intel_vdsc.h"
>  #include "intel_vrr.h"
> +#include "intel_cdclk.h"
>  
>  #define DP_DPRX_ESI_LEN 14
>  
> @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct 
> intel_dp *intel_dp,
>  struct drm_connector_state *conn_state,
>  struct link_config_limits *limits)
>  {
> + struct intel_cdclk_state *cdclk_state;
>   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
>   struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
>   const struct drm_display_mode *adjusted_mode =
>   &pipe_config->hw.adjusted_mode;
> + struct intel_atomic_state *state =
> + to_intel_atomic_state(pipe_config->uapi.state);
>   int pipe_bpp;
>   int ret;
>  
> @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct 
> intel_dp *intel_dp,
>   }
>   }
>  
> + cdclk_state = intel_atomic_get_cdclk_state(state);
> + if (IS_ERR(cdclk_state))
> + return PTR_ERR(cdclk_state);
> +
>   /*
>* VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
> -  * is greater than the maximum Cdclock and if slice count is even
> +  * is greater than the current Cdclock and if slice count is even
>* then we need to use 2 VDSC instances.
>*/
> - if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> + if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||

So in the end, we didn't have to bump CDCLK up to get rid of that?

Anyways, checked with BSpec 49259, seems to make sense, was no point in
comparing to max CDCLK, which is not even currently used.

Reviewed-by: Stanislav Lisovskiy 

>   pipe_config->bigjoiner) {
>   if (pipe_config->dsc.slice_count < 2) {
>   drm_dbg_kms(&dev_priv->drm,
> -- 
> 2.32.0
> 


Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Ville Syrjälä
On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> Each VDSC operates with 1ppc throughput, hence enable the second
> VDSC engine when moderate is higher that the current cdclk.
> 
> Signed-off-by: Vandita Kulkarni 
> ---
>  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
>  1 file changed, 10 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/display/intel_dp.c 
> b/drivers/gpu/drm/i915/display/intel_dp.c
> index 161c33b2c869..55878f65f724 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> @@ -70,6 +70,7 @@
>  #include "intel_tc.h"
>  #include "intel_vdsc.h"
>  #include "intel_vrr.h"
> +#include "intel_cdclk.h"
>  
>  #define DP_DPRX_ESI_LEN 14
>  
> @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct 
> intel_dp *intel_dp,
>  struct drm_connector_state *conn_state,
>  struct link_config_limits *limits)
>  {
> + struct intel_cdclk_state *cdclk_state;
>   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
>   struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
>   const struct drm_display_mode *adjusted_mode =
>   &pipe_config->hw.adjusted_mode;
> + struct intel_atomic_state *state =
> + to_intel_atomic_state(pipe_config->uapi.state);
>   int pipe_bpp;
>   int ret;
>  
> @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct 
> intel_dp *intel_dp,
>   }
>   }
>  
> + cdclk_state = intel_atomic_get_cdclk_state(state);
> + if (IS_ERR(cdclk_state))
> + return PTR_ERR(cdclk_state);
> +
>   /*
>* VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
> -  * is greater than the maximum Cdclock and if slice count is even
> +  * is greater than the current Cdclock and if slice count is even
>* then we need to use 2 VDSC instances.
>*/
> - if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> + if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||

This is wrong. We compute the cdclk based on the requirements of the
mode/etc., not the other way around.

>   pipe_config->bigjoiner) {
>   if (pipe_config->dsc.slice_count < 2) {
>   drm_dbg_kms(&dev_priv->drm,
> -- 
> 2.32.0

-- 
Ville Syrjälä
Intel


Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Kulkarni, Vandita
> -Original Message-
> From: Ville Syrjälä 
> Sent: Tuesday, September 14, 2021 12:59 PM
> To: Kulkarni, Vandita 
> Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani ;
> Navare, Manasi D 
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> engine for higher moderates
> 
> On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> > Each VDSC operates with 1ppc throughput, hence enable the second VDSC
> > engine when moderate is higher that the current cdclk.
> >
> > Signed-off-by: Vandita Kulkarni 
> > ---
> >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> >  1 file changed, 10 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > b/drivers/gpu/drm/i915/display/intel_dp.c
> > index 161c33b2c869..55878f65f724 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -70,6 +70,7 @@
> >  #include "intel_tc.h"
> >  #include "intel_vdsc.h"
> >  #include "intel_vrr.h"
> > +#include "intel_cdclk.h"
> >
> >  #define DP_DPRX_ESI_LEN 14
> >
> > @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct
> intel_dp *intel_dp,
> >struct drm_connector_state *conn_state,
> >struct link_config_limits *limits)  {
> > +   struct intel_cdclk_state *cdclk_state;
> > struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
> > struct drm_i915_private *dev_priv = to_i915(dig_port-
> >base.base.dev);
> > const struct drm_display_mode *adjusted_mode =
> > &pipe_config->hw.adjusted_mode;
> > +   struct intel_atomic_state *state =
> > +   to_intel_atomic_state(pipe_config-
> >uapi.state);
> > int pipe_bpp;
> > int ret;
> >
> > @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct
> intel_dp *intel_dp,
> > }
> > }
> >
> > +   cdclk_state = intel_atomic_get_cdclk_state(state);
> > +   if (IS_ERR(cdclk_state))
> > +   return PTR_ERR(cdclk_state);
> > +
> > /*
> >  * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
> > -* is greater than the maximum Cdclock and if slice count is even
> > +* is greater than the current Cdclock and if slice count is even
> >  * then we need to use 2 VDSC instances.
> >  */
> > -   if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> > +   if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
> 
> This is wrong. We compute the cdclk based on the requirements of the
> mode/etc., not the other way around.

Okay , So you suggest that we set the cd clock to max when we have such 
requirement, than enabling the second engine?

> 
> > pipe_config->bigjoiner) {
> > if (pipe_config->dsc.slice_count < 2) {
> > drm_dbg_kms(&dev_priv->drm,
> > --
> > 2.32.0
> 
> --
> Ville Syrjälä
> Intel


Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Kulkarni, Vandita
> -Original Message-
> From: Lisovskiy, Stanislav 
> Sent: Tuesday, September 14, 2021 12:49 PM
> To: Kulkarni, Vandita 
> Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani ;
> Navare, Manasi D 
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> engine for higher moderates
> 
> On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> > Each VDSC operates with 1ppc throughput, hence enable the second VDSC
> > engine when moderate is higher that the current cdclk.
> >
> > Signed-off-by: Vandita Kulkarni 
> > ---
> >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> >  1 file changed, 10 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > b/drivers/gpu/drm/i915/display/intel_dp.c
> > index 161c33b2c869..55878f65f724 100644
> > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > @@ -70,6 +70,7 @@
> >  #include "intel_tc.h"
> >  #include "intel_vdsc.h"
> >  #include "intel_vrr.h"
> > +#include "intel_cdclk.h"
> >
> >  #define DP_DPRX_ESI_LEN 14
> >
> > @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct
> intel_dp *intel_dp,
> >struct drm_connector_state *conn_state,
> >struct link_config_limits *limits)  {
> > +   struct intel_cdclk_state *cdclk_state;
> > struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
> > struct drm_i915_private *dev_priv = to_i915(dig_port-
> >base.base.dev);
> > const struct drm_display_mode *adjusted_mode =
> > &pipe_config->hw.adjusted_mode;
> > +   struct intel_atomic_state *state =
> > +   to_intel_atomic_state(pipe_config-
> >uapi.state);
> > int pipe_bpp;
> > int ret;
> >
> > @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct
> intel_dp *intel_dp,
> > }
> > }
> >
> > +   cdclk_state = intel_atomic_get_cdclk_state(state);
> > +   if (IS_ERR(cdclk_state))
> > +   return PTR_ERR(cdclk_state);
> > +
> > /*
> >  * VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
> > -* is greater than the maximum Cdclock and if slice count is even
> > +* is greater than the current Cdclock and if slice count is even
> >  * then we need to use 2 VDSC instances.
> >  */
> > -   if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> > +   if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
> 
> So in the end, we didn't have to bump CDCLK up to get rid of that?

The solution that could fix the underruns was either of these:  set max cdclk 
that can drive this or enable the second dsc engine if slice count  > 2  to 
achieve 2ppc.

> 
> Anyways, checked with BSpec 49259, seems to make sense, was no point in
> comparing to max CDCLK, which is not even currently used.
> 
> Reviewed-by: Stanislav Lisovskiy 
> 
> > pipe_config->bigjoiner) {
> > if (pipe_config->dsc.slice_count < 2) {
> > drm_dbg_kms(&dev_priv->drm,
> > --
> > 2.32.0
> >


Re: [Intel-gfx] [PATCH 04/14] drm/hdcp: Expand HDCP helper library for enable/disable/check

2021-09-14 Thread kernel test robot
Hi Sean,

I love your patch! Perhaps something to improve:

[auto build test WARNING on drm-tip/drm-tip]
[also build test WARNING on drm-exynos/exynos-drm-next 
tegra-drm/drm/tegra/for-next linus/master v5.15-rc1 next-20210914]
[cannot apply to drm-intel/for-linux-next drm/drm-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:
https://github.com/0day-ci/linux/commits/Sean-Paul/drm-hdcp-Pull-HDCP-auth-exchange-check-into/20210914-020004
base:   git://anongit.freedesktop.org/drm/drm-tip drm-tip
config: riscv-randconfig-r042-20210913 (attached as .config)
compiler: clang version 14.0.0 (https://github.com/llvm/llvm-project 
261cbe98c38f8c1ee1a482fe7650e790f58a)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install riscv cross compiling tool for clang build
# apt-get install binutils-riscv64-linux-gnu
# 
https://github.com/0day-ci/linux/commit/ceee3075ca23d7911b80eb6a71a0b352d7c6b52c
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review 
Sean-Paul/drm-hdcp-Pull-HDCP-auth-exchange-check-into/20210914-020004
git checkout ceee3075ca23d7911b80eb6a71a0b352d7c6b52c
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=riscv 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All warnings (new ones prefixed by >>):

   In file included from drivers/gpu/drm/drm_hdcp.c:13:
   In file included from include/linux/i2c.h:18:
   In file included from include/linux/regulator/consumer.h:35:
   In file included from include/linux/suspend.h:5:
   In file included from include/linux/swap.h:9:
   In file included from include/linux/memcontrol.h:13:
   In file included from include/linux/cgroup.h:26:
   In file included from include/linux/kernel_stat.h:9:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/riscv/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/riscv/include/asm/io.h:136:
   include/asm-generic/io.h:464:31: warning: performing pointer arithmetic on a 
null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   val = __raw_readb(PCI_IOBASE + addr);
 ~~ ^
   include/asm-generic/io.h:477:61: warning: performing pointer arithmetic on a 
null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   val = __le16_to_cpu((__le16 __force)__raw_readw(PCI_IOBASE + addr));
   ~~ ^
   include/uapi/linux/byteorder/little_endian.h:36:51: note: expanded from 
macro '__le16_to_cpu'
   #define __le16_to_cpu(x) ((__force __u16)(__le16)(x))
 ^
   In file included from drivers/gpu/drm/drm_hdcp.c:13:
   In file included from include/linux/i2c.h:18:
   In file included from include/linux/regulator/consumer.h:35:
   In file included from include/linux/suspend.h:5:
   In file included from include/linux/swap.h:9:
   In file included from include/linux/memcontrol.h:13:
   In file included from include/linux/cgroup.h:26:
   In file included from include/linux/kernel_stat.h:9:
   In file included from include/linux/interrupt.h:11:
   In file included from include/linux/hardirq.h:11:
   In file included from ./arch/riscv/include/generated/asm/hardirq.h:1:
   In file included from include/asm-generic/hardirq.h:17:
   In file included from include/linux/irq.h:20:
   In file included from include/linux/io.h:13:
   In file included from arch/riscv/include/asm/io.h:136:
   include/asm-generic/io.h:490:61: warning: performing pointer arithmetic on a 
null pointer has undefined behavior [-Wnull-pointer-arithmetic]
   val = __le32_to_cpu((__le32 __force)__raw_readl(PCI_IOBASE + addr));
   ~~ ^
   include/uapi/linux/byteorder/little_endian.h:34:51: note: expanded from 
macro '__le32_to_cpu'
   #define __le32_to_cpu(x) ((__force __u32)(__le32)(x))
 ^
   In file included from drivers/gpu/drm/drm_hdcp.c:13:
   In file included from include/linux/i2c.h:18:
   In file included from include/linux/regulator/consumer.h:35:
   In file included from include/linux/suspend.h:5:
   In file included from include/linux/swap.h:9:
   In file included from include/linux/memcontrol.h:13:
  

Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Ville Syrjälä
On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
> > -Original Message-
> > From: Ville Syrjälä 
> > Sent: Tuesday, September 14, 2021 12:59 PM
> > To: Kulkarni, Vandita 
> > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani ;
> > Navare, Manasi D 
> > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> > engine for higher moderates
> > 
> > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> > > Each VDSC operates with 1ppc throughput, hence enable the second VDSC
> > > engine when moderate is higher that the current cdclk.
> > >
> > > Signed-off-by: Vandita Kulkarni 
> > > ---
> > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> > >  1 file changed, 10 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > > b/drivers/gpu/drm/i915/display/intel_dp.c
> > > index 161c33b2c869..55878f65f724 100644
> > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > > @@ -70,6 +70,7 @@
> > >  #include "intel_tc.h"
> > >  #include "intel_vdsc.h"
> > >  #include "intel_vrr.h"
> > > +#include "intel_cdclk.h"
> > >
> > >  #define DP_DPRX_ESI_LEN 14
> > >
> > > @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct
> > intel_dp *intel_dp,
> > >  struct drm_connector_state *conn_state,
> > >  struct link_config_limits *limits)  {
> > > + struct intel_cdclk_state *cdclk_state;
> > >   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
> > >   struct drm_i915_private *dev_priv = to_i915(dig_port-
> > >base.base.dev);
> > >   const struct drm_display_mode *adjusted_mode =
> > >   &pipe_config->hw.adjusted_mode;
> > > + struct intel_atomic_state *state =
> > > + to_intel_atomic_state(pipe_config-
> > >uapi.state);
> > >   int pipe_bpp;
> > >   int ret;
> > >
> > > @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct
> > intel_dp *intel_dp,
> > >   }
> > >   }
> > >
> > > + cdclk_state = intel_atomic_get_cdclk_state(state);
> > > + if (IS_ERR(cdclk_state))
> > > + return PTR_ERR(cdclk_state);
> > > +
> > >   /*
> > >* VDSC engine operates at 1 Pixel per clock, so if peak pixel rate
> > > -  * is greater than the maximum Cdclock and if slice count is even
> > > +  * is greater than the current Cdclock and if slice count is even
> > >* then we need to use 2 VDSC instances.
> > >*/
> > > - if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> > > + if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
> > 
> > This is wrong. We compute the cdclk based on the requirements of the
> > mode/etc., not the other way around.
> 
> Okay , So you suggest that we set the cd clock to max when we have such 
> requirement, than enabling the second engine?

That seems like the easiest solution. Another option might be to come up
with some lower dotclock limit for the use of the second vdsc. But not
sure we know where the tipping point is wrt. powr consumption.

-- 
Ville Syrjälä
Intel


Re: [Intel-gfx] [PATCH 01/16] Revert "drm/i915/display: Disable audio, DRRS and PSR before planes"

2021-09-14 Thread Ville Syrjälä
On Mon, Sep 13, 2021 at 04:28:35PM +, Souza, Jose wrote:
> On Mon, 2021-09-13 at 17:44 +0300, Ville Syrjala wrote:
> > From: Ville Syrjälä 
> > 
> > Disabling planes in the middle of the modeset seuqnece does not make
> > sense since userspace can anyway disable planes before the modeset
> > even starts. So when the modeset seuqence starts the set of enabled
> > planes is entirely arbitrary. Trying to sprinkle the plane disabling
> > into the modeset sequence just means more randomness and potential
> > for hard to reproduce bugs.
> 
> The patch being reverted did not changed anything about plane, it only 
> disables audio and PSR before pipe is disabled in this case.

The commit message only talks about planes. Also we already disable
the pipe in the post_disable hook, so PSR/audio was always disabled
before the pipe IIRC.

> I have other pending patch handling cases were userspace still has pipe 
> enabled but no planes enabled.

So we need that I guess rather than hacking around it in the modeset
sequence.

> 
> > 
> > So it makes most sense to just disable all planes first so that the
> > rest of the modeset sequence remains identical regardless of which
> > planes happen to be enabled by userspace at the time.
> 
> This is not what specification ask us to do

The text has always been the same. It just marks the last point at which
the planes must be disabled.

> and for Alderlake-P not following it causes underruns.

Sounds like we have some other bug somewhere then, becaue supposedly you
get the same underrun if you disable all the planes from userspace
before the modeset then?

> 
> BSpec: 49190
> 
> > 
> > This reverts commit 84030adb9e27d202a66022488bf0349a8bd45213.
> > 
> > Cc: Gwan-gyeong Mun 
> > Cc: José Roberto de Souza 
> > Signed-off-by: Ville Syrjälä 
> > ---
> >  drivers/gpu/drm/i915/display/intel_ddi.c  | 30 +++
> >  drivers/gpu/drm/i915/display/intel_display.c  | 24 ---
> >  .../drm/i915/display/intel_display_types.h|  4 ---
> >  drivers/gpu/drm/i915/display/intel_dp_mst.c   | 14 ++---
> >  4 files changed, 13 insertions(+), 59 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c 
> > b/drivers/gpu/drm/i915/display/intel_ddi.c
> > index 23ef291f7b30..c1fd7cbb40e1 100644
> > --- a/drivers/gpu/drm/i915/display/intel_ddi.c
> > +++ b/drivers/gpu/drm/i915/display/intel_ddi.c
> > @@ -3164,6 +3164,12 @@ static void intel_disable_ddi_dp(struct 
> > intel_atomic_state *state,
> >  
> > intel_dp->link_trained = false;
> >  
> > +   if (old_crtc_state->has_audio)
> > +   intel_audio_codec_disable(encoder,
> > + old_crtc_state, old_conn_state);
> > +
> > +   intel_drrs_disable(intel_dp, old_crtc_state);
> > +   intel_psr_disable(intel_dp, old_crtc_state);
> > intel_edp_backlight_off(old_conn_state);
> > /* Disable the decompression in DP Sink */
> > intel_dp_sink_set_decompression_state(intel_dp, old_crtc_state,
> > @@ -3181,6 +3187,10 @@ static void intel_disable_ddi_hdmi(struct 
> > intel_atomic_state *state,
> > struct drm_i915_private *i915 = to_i915(encoder->base.dev);
> > struct drm_connector *connector = old_conn_state->connector;
> >  
> > +   if (old_crtc_state->has_audio)
> > +   intel_audio_codec_disable(encoder,
> > + old_crtc_state, old_conn_state);
> > +
> > if (!intel_hdmi_handle_sink_scrambling(encoder, connector,
> >false, false))
> > drm_dbg_kms(&i915->drm,
> > @@ -3188,25 +3198,6 @@ static void intel_disable_ddi_hdmi(struct 
> > intel_atomic_state *state,
> > connector->base.id, connector->name);
> >  }
> >  
> > -static void intel_pre_disable_ddi(struct intel_atomic_state *state,
> > - struct intel_encoder *encoder,
> > - const struct intel_crtc_state *old_crtc_state,
> > - const struct drm_connector_state 
> > *old_conn_state)
> > -{
> > -   struct intel_dp *intel_dp;
> > -
> > -   if (old_crtc_state->has_audio)
> > -   intel_audio_codec_disable(encoder, old_crtc_state,
> > - old_conn_state);
> > -
> > -   if (intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_HDMI))
> > -   return;
> > -
> > -   intel_dp = enc_to_intel_dp(encoder);
> > -   intel_drrs_disable(intel_dp, old_crtc_state);
> > -   intel_psr_disable(intel_dp, old_crtc_state);
> > -}
> > -
> >  static void intel_disable_ddi(struct intel_atomic_state *state,
> >   struct intel_encoder *encoder,
> >   const struct intel_crtc_state *old_crtc_state,
> > @@ -4541,7 +4532,6 @@ void intel_ddi_init(struct drm_i915_private 
> > *dev_priv, enum port port)
> > encoder->enable = intel_enable_ddi;
> > encoder->pre_pll_enable = intel_ddi_pre_pll_enable;
> > encoder->pre_enable = int

Re: [Intel-gfx] [RFC PATCH] drm/ttm: Add a private member to the struct ttm_resource

2021-09-14 Thread Thomas Hellström
On Tue, 2021-09-14 at 09:40 +0200, Christian König wrote:
> Am 13.09.21 um 14:41 schrieb Thomas Hellström:
> > [SNIP]
> > > > > Let's say you have a struct ttm_object_vram and a struct 
> > > > > ttm_object_gtt, both subclassing drm_gem_object. Then I'd say
> > > > > a 
> > > > > driver would want to subclass those to attach identical data,
> > > > > extend functionality and provide a single i915_gem_object to
> > > > > the 
> > > > > rest of the driver, which couldn't care less whether it's
> > > > > vram or 
> > > > > gtt? Wouldn't you say having separate struct ttm_object_vram
> > > > > and a 
> > > > > struct ttm_object_gtt in this case would be awkward?. We
> > > > > *want* to 
> > > > > allow common handling.
> > > > 
> > > > Yeah, but that's a bad idea. This is like diamond inheritance
> > > > in C++.
> > > > 
> > > > When you need the same functionality in different backends you 
> > > > implement that as separate object and then add a parent class.
> > > > 
> > > > > 
> > > > > It's the exact same situation here. With struct ttm_resource
> > > > > you 
> > > > > let *different* implementation flavours subclass it, which
> > > > > makes it 
> > > > > awkward for the driver to extend the functionality in a
> > > > > common way 
> > > > > by subclassing, unless the driver only uses a single
> > > > > implementation.
> > > > 
> > > > Well the driver should use separate implementations for their 
> > > > different domains as much as possible.
> > > > 
> > > Hmm, Now you lost me a bit. Are you saying that the way we do
> > > dynamic 
> > > backends in the struct ttm_buffer_object to facilitate driver 
> > > subclassing is a bad idea or that the RFC with backpointer is a
> > > bad 
> > > idea?
> > > 
> > > 
> > Or if you mean diamond inheritance is bad, yes that's basically my
> > point.
> 
> That diamond inheritance is a bad idea. What I don't understand is
> why 
> you need that in the first place?
> 
> Information that you attach to a resource are specific to the domain 
> where the resource is allocated from. So why do you want to attach
> the 
> same information to a resources from different domains?

Again, for the same reason that we do that with struct i915_gem_objects
and struct ttm_tts, to extend the functionality. I mean information
that we attach when we subclass a struct ttm_buffer_object doesn't
necessarily care about whether it's a VRAM or a GTT object. In exactly
the same way, information that we want to attach to a struct
ttm_resource doesn't necessarily care whether it's a system or a VRAM
resource, and need not be specific to any of those.

In this particular case, as memory management becomes asynchronous, you
can't attach things like sg-tables and gpu binding information to the
gem object anymore, because the object may have a number of migrations
in the pipeline. Such things need to be attached to the structure that
abstracts the memory allocation, and which may have a completely
different lifetime than the object itself.

In our particular case we want to attach information for cached page
lookup and and sg-table, and moving forward probably the gpu binding
(vma) information, and that is the same information for any
ttm_resource regardless where it's allocated from.

Typical example: A pipelined GPU operation happening before an async
eviction goes wrong. We need to error capture and reset. But if we look
at the object for error capturing, it's already updated pointing to an
after-eviction resource, and the resource sits on a ghost object (or in
the future when ghost objects go away perhaps in limbo somewhere).

We need to capture the memory pointed to by the struct ttm_resource the
GPU was referencing, and to be able to do that we need to cache driver-
specific info on the resource. Typically an sg-list and GPU binding
information. 

Anyway, that cached information needs to be destroyed together with the
resource and thus we need to be able to access that information from
the resource in some way, regardless whether it's a pointer or whether
we embed the struct resource.

I think it's pretty important here that we (using the inheritance
diagram below) recognize the need for D to inherit from A, just like we
do for objects or ttm_tts.


> 
> > 
> > Looking at
> > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMultiple_inheritance%23%2Fmedia%2FFile%3ADiamond_inheritance.svg&data=04%7C01%7Cchristian.koenig%40amd.com%7Cece4bd8aab644feacc1808d976b3ca56%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637671336950757656%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=LPMnfvC1px0bW8o420vP72oBbkm1v76A%2B0PDUw7urQY%3D&reserved=0
> >  
> > 
> > 
> > 1)
> > 
> > A would be the struct ttm_resource itself,
> > D would be struct i915_resource,
> > B would be struct ttm_range_mgr_node,
> > C would be struct i915_ttm_buddy_resource
> > 
> > And we need to resolve the ambiguity using the awkward union 

Re: [Intel-gfx] [PATCH 08/27] drm/i915: Add logical engine mapping

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 17:50, Matthew Brost wrote:

On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote:


On 10/09/2021 20:49, Matthew Brost wrote:

On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:


On 20/08/2021 23:44, Matthew Brost wrote:

Add logical engine mapping. This is required for split-frame, as
workloads need to be placed on engines in a logically contiguous manner.

v2:
(Daniel Vetter)
 - Add kernel doc for new fields

Signed-off-by: Matthew Brost 
---
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 60 ---
drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
.../drm/i915/gt/intel_execlists_submission.c  |  1 +
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|  2 +-
.../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +--
5 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 0d9105a31d84..4d790f9a65dd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -290,7 +290,8 @@ static void nop_irq_handler(struct intel_engine_cs *engine, 
u16 iir)
GEM_DEBUG_WARN_ON(iir);
}
-static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
+static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id,
+ u8 logical_instance)
{
const struct engine_info *info = &intel_engines[id];
struct drm_i915_private *i915 = gt->i915;
@@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt *gt, enum 
intel_engine_id id)
engine->class = info->class;
engine->instance = info->instance;
+   engine->logical_mask = BIT(logical_instance);
__sprint_engine_name(engine);
engine->props.heartbeat_interval_ms =
@@ -572,6 +574,37 @@ static intel_engine_mask_t init_engine_mask(struct 
intel_gt *gt)
return info->engine_mask;
}
+static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids,
+u8 class, const u8 *map, u8 num_instances)
+{
+   int i, j;
+   u8 current_logical_id = 0;
+
+   for (j = 0; j < num_instances; ++j) {
+   for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
+   if (!HAS_ENGINE(gt, i) ||
+   intel_engines[i].class != class)
+   continue;
+
+   if (intel_engines[i].instance == map[j]) {
+   logical_ids[intel_engines[i].instance] =
+   current_logical_id++;
+   break;
+   }
+   }
+   }
+}
+
+static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class)
+{
+   int i;
+   u8 map[MAX_ENGINE_INSTANCE + 1];
+
+   for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
+   map[i] = i;


What's the point of the map array since it is 1:1 with instance?



Future products do not have a 1 to 1 mapping and that mapping can change
based on fusing, e.g. XeHP SDV.

Also technically ICL / TGL / ADL physical instance 2 maps to logical
instance 1.


I don't follow the argument. All I can see is that "map[i] = i" always in
the proposed code, which is then used to check "instance == map[instance]".
So I'd suggest to remove this array from the code until there is a need for
it.



Ok, this logic is slightly confusing and makes more sense once we have
non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default
with the value in map[i] being a physical instance. Populate_logical_ids
searches the map finding all physical instances present in the map
assigning each found instance a new logical id increasing by 1 each
time.

e.g. If the map is setup 0-N and only physical instance 0 / 2 are
present they will get logical mapping 0 / 1 respectfully.

This algorithm works for non-standard mappings too /w fused parts. e.g.
on XeHP SDV the map is: { 0, 2, 4, 6, 1, 3, 5, 7 } and if any of the
physical instances can't be found due to fusing the logical mapping is
still correct per the bspec.

This array is absolutely needed for multi-lrc submission to work, even
on ICL / TGL / ADL as the GuC only supports logically contiguous engine
instances.


No idea how can an array fixed at "map[i] = i" be absolutely needed when 
you can just write it like "i". Sometimes it is okay to lay some ground 
work for future platforms but in this case to me it's just obfuscation 
which should be added later, when it is required.



+   populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map));
+}
+
/**
 * intel_engines_init_mmio() - allocate and prepare the Engine Command 
Streamers
 * @gt: pointer to struct intel_gt
@@ -583,7 +616,8 @@ int intel_engines_init_mmio(struct intel_gt *gt)
struct drm_i915_private *i915 = gt->i915;
const unsigned int engin

Re: [Intel-gfx] [PATCH v2 0/6] drm/displayid: VESA vendor block and drm/i915 MSO use of it

2021-09-14 Thread Maxime Ripard
Hi,

On Mon, Sep 13, 2021 at 07:45:03PM +0300, Jani Nikula wrote:
> On Tue, 31 Aug 2021, Jani Nikula  wrote:
> > v2 of https://patchwork.freedesktop.org/series/94161/ with the VESA OUI
> > check and an OUI helper patch added.
> 
> Maarten, Maxime, Thomas - may I have an ack for merging this via
> drm-intel? I think at this time we can get the merge to drm-next and
> backmerge to drm-misc fairly quickly, and a topic branch would be
> overkill.

Yep, that works for me

Acked-by: Maxime Ripard 

Maxime


signature.asc
Description: PGP signature


Re: [Intel-gfx] [PATCH 04/27] drm/i915/guc: Take GT PM ref when deregistering context

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 18:12, Matthew Brost wrote:

On Mon, Sep 13, 2021 at 10:55:59AM +0100, Tvrtko Ursulin wrote:


On 20/08/2021 23:44, Matthew Brost wrote:

Taking a PM reference to prevent intel_gt_wait_for_idle from short
circuiting while a deregister context H2G is in flight.

FIXME: Move locking / structure changes into different patch

Signed-off-by: Matthew Brost 
---
   drivers/gpu/drm/i915/gt/intel_context.c   |   2 +
   drivers/gpu/drm/i915/gt/intel_context_types.h |  13 +-
   drivers/gpu/drm/i915/gt/intel_engine_pm.h |   5 +
   drivers/gpu/drm/i915/gt/intel_gt_pm.h |  13 ++
   .../gpu/drm/i915/gt/uc/abi/guc_actions_abi.h  |   1 +
   drivers/gpu/drm/i915/gt/uc/intel_guc.h|  46 ++--
   .../gpu/drm/i915/gt/uc/intel_guc_debugfs.c|  13 +-
   .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 212 +++---
   8 files changed, 199 insertions(+), 106 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
b/drivers/gpu/drm/i915/gt/intel_context.c
index adfe49b53b1b..c8595da64ad8 100644
--- a/drivers/gpu/drm/i915/gt/intel_context.c
+++ b/drivers/gpu/drm/i915/gt/intel_context.c
@@ -399,6 +399,8 @@ intel_context_init(struct intel_context *ce, struct 
intel_engine_cs *engine)
ce->guc_id.id = GUC_INVALID_LRC_ID;
INIT_LIST_HEAD(&ce->guc_id.link);
+   INIT_LIST_HEAD(&ce->destroyed_link);
+
/*
 * Initialize fence to be complete as this is expected to be complete
 * unless there is a pending schedule disable outstanding.
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
b/drivers/gpu/drm/i915/gt/intel_context_types.h
index 80bbdc7810f6..fd338a30617e 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -190,22 +190,29 @@ struct intel_context {
/**
 * @id: unique handle which is used to communicate information
 * with the GuC about this context, protected by
-* guc->contexts_lock
+* guc->submission_state.lock
 */
u16 id;
/**
 * @ref: the number of references to the guc_id, when
 * transitioning in and out of zero protected by
-* guc->contexts_lock
+* guc->submission_state.lock
 */
atomic_t ref;
/**
 * @link: in guc->guc_id_list when the guc_id has no refs but is
-* still valid, protected by guc->contexts_lock
+* still valid, protected by guc->submission_state.lock
 */
struct list_head link;
} guc_id;
+   /**
+* @destroyed_link: link in guc->submission_state.destroyed_contexts, in
+* list when context is pending to be destroyed (deregistered with the
+* GuC), protected by guc->submission_state.lock
+*/
+   struct list_head destroyed_link;
+
   #ifdef CONFIG_DRM_I915_SELFTEST
/**
 * @drop_schedule_enable: Force drop of schedule enable G2H for selftest
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.h 
b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
index 70ea46d6cfb0..17a5028ea177 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.h
@@ -16,6 +16,11 @@ intel_engine_pm_is_awake(const struct intel_engine_cs 
*engine)
return intel_wakeref_is_active(&engine->wakeref);
   }
+static inline void __intel_engine_pm_get(struct intel_engine_cs *engine)
+{
+   __intel_wakeref_get(&engine->wakeref);
+}
+
   static inline void intel_engine_pm_get(struct intel_engine_cs *engine)
   {
intel_wakeref_get(&engine->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h 
b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
index d0588d8aaa44..a17bf0d4592b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h
@@ -41,6 +41,19 @@ static inline void intel_gt_pm_put_async(struct intel_gt *gt)
intel_wakeref_put_async(>->wakeref);
   }
+#define with_intel_gt_pm(gt, tmp) \
+   for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+intel_gt_pm_put(gt), tmp = 0)
+#define with_intel_gt_pm_async(gt, tmp) \
+   for (tmp = 1, intel_gt_pm_get(gt); tmp; \
+intel_gt_pm_put_async(gt), tmp = 0)
+#define with_intel_gt_pm_if_awake(gt, tmp) \
+   for (tmp = intel_gt_pm_get_if_awake(gt); tmp; \
+intel_gt_pm_put(gt), tmp = 0)
+#define with_intel_gt_pm_if_awake_async(gt, tmp) \
+   for (tmp = intel_gt_pm_get_if_awake(gt); tmp; \
+intel_gt_pm_put_async(gt), tmp = 0)
+
   static inline int intel_gt_pm_wait_for_idle(struct intel_gt *gt)
   {
return intel_wakeref_wait_for_idle(>->wakeref);
diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h 
b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_abi.h
index 8ff58aff..ba10bd374cee 100644
--- a/drivers/gpu/drm/i9

[Intel-gfx] [PATCH v2 1/7] drm/i915/gem: Break out some shmem backend utils

2021-09-14 Thread Matthew Auld
From: Thomas Hellström 

Break out some shmem backend utils for future reuse by the TTM backend:
shmem_alloc_st(), shmem_free_st() and __shmem_writeback() which we can
use to provide a shmem-backed TTM page pool for cached-only TTM
buffer objects.

Main functional change here is that we now compute the page sizes using
the dma segments rather than using the physical page address segments.

v2(Reported-by: kernel test robot )
- Make sure we initialise the mapping on the error path in
  shmem_get_pages()

Signed-off-by: Thomas Hellström 
Reviewed-by: Matthew Auld 
Signed-off-by: Matthew Auld 
---
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c | 181 +-
 1 file changed, 106 insertions(+), 75 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 11f072193f3b..36b711ae9e28 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,46 +25,61 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
 }
 
-static int shmem_get_pages(struct drm_i915_gem_object *obj)
+static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+ bool dirty, bool backup)
 {
-   struct drm_i915_private *i915 = to_i915(obj->base.dev);
-   struct intel_memory_region *mem = obj->mm.region;
-   const unsigned long page_count = obj->base.size / PAGE_SIZE;
+   struct sgt_iter sgt_iter;
+   struct pagevec pvec;
+   struct page *page;
+
+   mapping_clear_unevictable(mapping);
+
+   pagevec_init(&pvec);
+   for_each_sgt_page(page, sgt_iter, st) {
+   if (dirty)
+   set_page_dirty(page);
+
+   if (backup)
+   mark_page_accessed(page);
+
+   if (!pagevec_add(&pvec, page))
+   check_release_pagevec(&pvec);
+   }
+   if (pagevec_count(&pvec))
+   check_release_pagevec(&pvec);
+
+   sg_free_table(st);
+   kfree(st);
+}
+
+static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+  size_t size, struct intel_memory_region 
*mr,
+  struct address_space *mapping,
+  unsigned int max_segment)
+{
+   const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
-   struct address_space *mapping;
struct sg_table *st;
struct scatterlist *sg;
-   struct sgt_iter sgt_iter;
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
-   unsigned int max_segment = i915_sg_segment_size();
-   unsigned int sg_page_sizes;
gfp_t noreclaim;
int ret;
 
-   /*
-* Assert that the object is not currently in any GPU domain. As it
-* wasn't in the GTT, there shouldn't be any way it could have been in
-* a GPU cache
-*/
-   GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
-   GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
-
/*
 * If there's no chance of allocating enough pages for the whole
 * object, bail early.
 */
-   if (obj->base.size > resource_size(&mem->region))
-   return -ENOMEM;
+   if (size > resource_size(&mr->region))
+   return ERR_PTR(-ENOMEM);
 
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (!st)
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
 
-rebuild_st:
if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
kfree(st);
-   return -ENOMEM;
+   return ERR_PTR(-ENOMEM);
}
 
/*
@@ -73,14 +88,12 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
 *
 * Fail silently without starting the shrinker
 */
-   mapping = obj->base.filp->f_mapping;
mapping_set_unevictable(mapping);
noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
 
sg = st->sgl;
st->nents = 0;
-   sg_page_sizes = 0;
for (i = 0; i < page_count; i++) {
const unsigned int shrink[] = {
I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
@@ -135,10 +148,9 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj)
if (!i ||
sg->length >= max_segment ||
page_to_pfn(page) != last_pfn + 1) {
-   if (i) {
-   sg_page_sizes |= sg->length;
+   if (i)
sg = sg_next(sg);
-   }
+
st->nents++;
sg_set_page(sg, page, PAGE_SIZE, 0);
} else {
@@ -149,14 +161,65 @@ static int shmem_get_pages(struct drm_i915_gem_object 
*

[Intel-gfx] [PATCH v2 2/7] drm/ttm: add TTM_PAGE_FLAG_SHMEM

2021-09-14 Thread Matthew Auld
Add new flag to indicate special shmem based tt, which can directly
handle swapping itself, and should be visible to some shrinker.

As part of this we should skip the ttm_pages_allocated accounting, since
such tt objects should already be reachable, and potentially reclaimable
by some shrinker, if under memory pressure, and so shouldn't directly
count towards the swap "watermark" level.

We also need to stop touching the page->mapping and page->index for such
objects, like in ttm_tt_add_mapping, since shmem already uses these.
Some drivers seems to depend on the tt mapping/index behaviour for their
own purposes, so directly using shmem tt likely won't be usable there
as-is.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/ttm/ttm_bo_vm.c |  4 ++--
 drivers/gpu/drm/ttm/ttm_tt.c| 10 +-
 include/drm/ttm/ttm_tt.h|  1 +
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index f56be5bc0861..e2131c73dcb6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -346,8 +346,8 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
} else if (unlikely(!page)) {
break;
}
-   page->index = drm_vma_node_start(&bo->base.vma_node) +
-   page_offset;
+   if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_SHMEM))
+   page->index = 
drm_vma_node_start(&bo->base.vma_node) + page_offset;
pfn = page_to_pfn(page);
}
 
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index dae52433beeb..cc4815c1f505 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -293,7 +293,7 @@ static void ttm_tt_add_mapping(struct ttm_device *bdev, 
struct ttm_tt *ttm)
 {
pgoff_t i;
 
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+   if (ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))
return;
 
for (i = 0; i < ttm->num_pages; ++i)
@@ -311,7 +311,7 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ttm_tt_is_populated(ttm))
return 0;
 
-   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
if (bdev->pool.use_dma32)
atomic_long_add(ttm->num_pages,
@@ -349,7 +349,7 @@ int ttm_tt_populate(struct ttm_device *bdev,
return 0;
 
 error:
-   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
if (bdev->pool.use_dma32)
atomic_long_sub(ttm->num_pages,
@@ -364,7 +364,7 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
pgoff_t i;
struct page **page = ttm->pages;
 
-   if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+   if (ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))
return;
 
for (i = 0; i < ttm->num_pages; ++i) {
@@ -384,7 +384,7 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
else
ttm_pool_free(&bdev->pool, ttm);
 
-   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
if (bdev->pool.use_dma32)
atomic_long_sub(ttm->num_pages,
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 89b15d673b22..20d550185065 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -42,6 +42,7 @@ struct ttm_operation_ctx;
 #define TTM_PAGE_FLAG_ZERO_ALLOC  (1 << 6)
 #define TTM_PAGE_FLAG_SG  (1 << 8)
 #define TTM_PAGE_FLAG_NO_RETRY   (1 << 9)
+#define TTM_PAGE_FLAG_SHMEM  (1 << 10)
 
 #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
 
-- 
2.26.3



[Intel-gfx] [PATCH v2 3/7] drm/i915/ttm: add tt shmem backend

2021-09-14 Thread Matthew Auld
For cached objects we can allocate our pages directly in shmem. This
should make it possible(in a later patch) to utilise the existing
i915-gem shrinker code for such objects. For now this is still disabled.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h |   8 +
 drivers/gpu/drm/i915/gem/i915_gem_shmem.c  |  14 +-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c| 214 ++---
 3 files changed, 206 insertions(+), 30 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 48112b9d76df..561d6bd0a5c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -618,6 +618,14 @@ int i915_gem_object_wait_migration(struct 
drm_i915_gem_object *obj,
 bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
 
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+   size_t size, struct intel_memory_region *mr,
+   struct address_space *mapping,
+   unsigned int max_segment);
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+  bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
 #ifdef CONFIG_MMU_NOTIFIER
 static inline bool
 i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
index 36b711ae9e28..19e55cc29a15 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -25,8 +25,8 @@ static void check_release_pagevec(struct pagevec *pvec)
cond_resched();
 }
 
-static void shmem_free_st(struct sg_table *st, struct address_space *mapping,
- bool dirty, bool backup)
+void shmem_free_st(struct sg_table *st, struct address_space *mapping,
+  bool dirty, bool backup)
 {
struct sgt_iter sgt_iter;
struct pagevec pvec;
@@ -52,10 +52,10 @@ static void shmem_free_st(struct sg_table *st, struct 
address_space *mapping,
kfree(st);
 }
 
-static struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
-  size_t size, struct intel_memory_region 
*mr,
-  struct address_space *mapping,
-  unsigned int max_segment)
+struct sg_table *shmem_alloc_st(struct drm_i915_private *i915,
+   size_t size, struct intel_memory_region *mr,
+   struct address_space *mapping,
+   unsigned int max_segment)
 {
const unsigned long page_count = size / PAGE_SIZE;
unsigned long i;
@@ -300,7 +300,7 @@ shmem_truncate(struct drm_i915_gem_object *obj)
obj->mm.pages = ERR_PTR(-EFAULT);
 }
 
-static void __shmem_writeback(size_t size, struct address_space *mapping)
+void __shmem_writeback(size_t size, struct address_space *mapping)
 {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index aefaf9293005..e60e538afcd9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -37,6 +37,9 @@
  * @ttm: The base TTM page vector.
  * @dev: The struct device used for dma mapping and unmapping.
  * @cached_st: The cached scatter-gather table.
+ * @obj: The GEM object. Should be valid while we have a valid bo->ttm.
+ * @filp: The shmem file, if using shmem backend.
+ * @backup: Swap out the pages when unpopulating, if using shmem backend.
  *
  * Note that DMA may be going on right up to the point where the page-
  * vector is unpopulated in delayed destroy. Hence keep the
@@ -48,6 +51,9 @@ struct i915_ttm_tt {
struct ttm_tt ttm;
struct device *dev;
struct sg_table *cached_st;
+   struct drm_i915_gem_object *obj;
+   struct file *filp;
+   bool backup;
 };
 
 static const struct ttm_place sys_placement_flags = {
@@ -167,12 +173,105 @@ i915_ttm_placement_from_obj(const struct 
drm_i915_gem_object *obj,
placement->busy_placement = busy;
 }
 
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+ struct ttm_tt *ttm,
+ struct ttm_operation_ctx *ctx)
+{
+   struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+   struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+   struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+   const unsigned int max_segment = i915_sg_segment_size();
+   const size_t size = ttm->num_pa

[Intel-gfx] [PATCH v2 4/7] drm/i915/ttm: use cached system pages when evicting lmem

2021-09-14 Thread Matthew Auld
This should let us do an accelerated copy directly to the shmem pages
when temporarily moving lmem-only objects, where the i915-gem shrinker
can later kick in to swap out the pages, if needed.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index e60e538afcd9..a63beee57210 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -123,11 +123,11 @@ static enum ttm_caching
 i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
 {
/*
-* Objects only allowed in system get cached cpu-mappings.
-* Other objects get WC mapping for now. Even if in system.
+* Objects only allowed in system get cached cpu-mappings, or when
+* evicting lmem-only buffers to system for swapping. Other objects get
+* WC mapping for now. Even if in system.
 */
-   if (obj->mm.region->type == INTEL_MEMORY_SYSTEM &&
-   obj->mm.n_placements <= 1)
+   if (obj->mm.n_placements <= 1)
return ttm_cached;
 
return ttm_write_combined;
-- 
2.26.3



[Intel-gfx] [PATCH v2 5/7] drm/i915: try to simplify make_{un}shrinkable

2021-09-14 Thread Matthew Auld
Drop the atomic shrink_pin stuff, and just have make_{un}shrinkable
update the shrinker visible lists immediately. This at least simplifies
the next patch, and does make the behaviour more obvious. The potential
downside is that make_unshrinkable now grabs a global lock even when the
object itself is no longer shrinkable(transitioning from purgeable <->
shrinkable doesn't seem to be a thing), for example in the ppGTT
insertion paths we should now be careful not to needlessly call
make_unshrinkable multiple times. Outside of that there is some fallout
in intel_context which relies on nesting calls to shrink_pin.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.c|  9 
 .../gpu/drm/i915/gem/i915_gem_object_types.h  |  3 +-
 drivers/gpu/drm/i915/gem/i915_gem_pages.c | 16 +-
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c  | 52 +--
 drivers/gpu/drm/i915/gt/gen6_ppgtt.c  |  1 -
 drivers/gpu/drm/i915/gt/gen8_ppgtt.c  |  1 -
 drivers/gpu/drm/i915/gt/intel_context.c   |  9 +---
 7 files changed, 41 insertions(+), 50 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c 
b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index 6fb9afb65034..e8265a432fcb 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -305,15 +305,6 @@ static void i915_gem_free_object(struct drm_gem_object 
*gem_obj)
 */
atomic_inc(&i915->mm.free_count);
 
-   /*
-* This serializes freeing with the shrinker. Since the free
-* is delayed, first by RCU then by the workqueue, we want the
-* shrinker to be able to free pages of unreferenced objects,
-* or else we may oom whilst there are plenty of deferred
-* freed objects.
-*/
-   i915_gem_object_make_unshrinkable(obj);
-
/*
 * Since we require blocking on struct_mutex to unbind the freed
 * object from the GPU before releasing resources back to the
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 2471f36aaff3..a035ac26a090 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -453,7 +453,6 @@ struct drm_i915_gem_object {
 * instead go through the pin/unpin interfaces.
 */
atomic_t pages_pin_count;
-   atomic_t shrink_pin;
 
/**
 * Priority list of potential placements for this object.
@@ -514,7 +513,7 @@ struct drm_i915_gem_object {
struct i915_gem_object_page_iter get_dma_page;
 
/**
-* Element within i915->mm.unbound_list or i915->mm.bound_list,
+* Element within i915->mm.shrink_list or i915->mm.purge_list,
 * locked by i915->mm.obj_lock.
 */
struct list_head link;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c 
b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
index 8eb1c3a6fc9c..f0df1394d7f6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -64,28 +64,16 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object 
*obj,
GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
i915_gem_object_set_tiling_quirk(obj);
GEM_BUG_ON(!list_empty(&obj->mm.link));
-   atomic_inc(&obj->mm.shrink_pin);
shrinkable = false;
}
 
if (shrinkable) {
-   struct list_head *list;
-   unsigned long flags;
-
assert_object_held(obj);
-   spin_lock_irqsave(&i915->mm.obj_lock, flags);
-
-   i915->mm.shrink_count++;
-   i915->mm.shrink_memory += obj->base.size;
 
if (obj->mm.madv != I915_MADV_WILLNEED)
-   list = &i915->mm.purge_list;
+   i915_gem_object_make_purgeable(obj);
else
-   list = &i915->mm.shrink_list;
-   list_add_tail(&obj->mm.link, list);
-
-   atomic_set(&obj->mm.shrink_pin, 0);
-   spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+   i915_gem_object_make_shrinkable(obj);
}
 }
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index e382b7f2353b..6b38e4414c5a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -455,23 +455,26 @@ void i915_gem_shrinker_taints_mutex(struct 
drm_i915_private *i915,
 
 #define obj_to_i915(obj__) to_i915((obj__)->base.dev)
 
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will 
also
+ * make the

[Intel-gfx] [PATCH v2 6/7] drm/i915/ttm: make evicted shmem pages visible to the shrinker

2021-09-14 Thread Matthew Auld
We currently just evict lmem objects to system memory when under memory
pressure. For this case we lack the usual object mm.pages, which
effectively hides the pages from the i915-gem shrinker, until we
actually "attach" the TT to the object, or in the case of lmem-only
objects it just gets migrated back to lmem when touched again. For such
cases we can make the object visible as soon as we populate the TT with
shmem pages, and then hide it again when doing the unpopulate.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_object.h   |  1 +
 drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 29 +++-
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 11 
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 561d6bd0a5c9..28b831c78c47 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -540,6 +540,7 @@ i915_gem_object_pin_to_display_plane(struct 
drm_i915_gem_object *obj,
 
 void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
 
 static inline bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c 
b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
index 6b38e4414c5a..02175e8ad069 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -482,13 +482,12 @@ void i915_gem_object_make_unshrinkable(struct 
drm_i915_gem_object *obj)
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
-static void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
- struct list_head *head)
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+  struct list_head *head)
 {
struct drm_i915_private *i915 = obj_to_i915(obj);
unsigned long flags;
 
-   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
if (!i915_gem_object_is_shrinkable(obj))
return;
 
@@ -507,6 +506,21 @@ static void __i915_gem_object_make_shrinkable(struct 
drm_i915_gem_object *obj,
spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
 }
 
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+   ___i915_gem_object_make_shrinkable(obj,
+  &obj_to_i915(obj)->mm.shrink_list);
+}
 
 /**
  * i915_gem_object_make_shrinkable - Move the object to the tail of the
@@ -518,8 +532,8 @@ static void __i915_gem_object_make_shrinkable(struct 
drm_i915_gem_object *obj,
  */
 void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
 {
-   __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.shrink_list);
+   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+   __i915_gem_object_make_shrinkable(obj);
 }
 
 /**
@@ -533,6 +547,7 @@ void i915_gem_object_make_shrinkable(struct 
drm_i915_gem_object *obj)
  */
 void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
 {
-   __i915_gem_object_make_shrinkable(obj,
- &obj_to_i915(obj)->mm.purge_list);
+   GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+   ___i915_gem_object_make_shrinkable(obj,
+  &obj_to_i915(obj)->mm.purge_list);
 }
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index a63beee57210..f02037a8cebd 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -231,6 +231,15 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device 
*bdev,
if (ttm->page_flags & TTM_PAGE_FLAG_SWAPPED)
ttm->page_flags &= ~TTM_PAGE_FLAG_SWAPPED;
 
+   /*
+* Even if we lack mm.pages for this object(which will be the case when
+* something is evicted to system memory by TTM), we still want to make
+* this object visible to the shrinker, since the underlying ttm_tt
+* still has the real shmem pages. When unpopulating the tt(possibly due
+* to shrinking) we hide it again from the shrinker.
+*/
+   __i915_gem_object_make_shrinkabl

[Intel-gfx] [PATCH v2 7/7] drm/i915/ttm: enable shmem tt backend

2021-09-14 Thread Matthew Auld
Enable shmem tt backend, and enable shrinking.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
---
 drivers/gpu/drm/i915/gem/i915_gem_ttm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c 
b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
index f02037a8cebd..ed7be8732138 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -1047,6 +1047,7 @@ static u64 i915_ttm_mmap_offset(struct 
drm_i915_gem_object *obj)
 
 static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
.name = "i915_gem_object_ttm",
+   .flags = I915_GEM_OBJECT_IS_SHRINKABLE,
 
.get_pages = i915_ttm_get_pages,
.put_pages = i915_ttm_put_pages,
-- 
2.26.3



Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Lisovskiy, Stanislav
On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
> > > -Original Message-
> > > From: Ville Syrjälä 
> > > Sent: Tuesday, September 14, 2021 12:59 PM
> > > To: Kulkarni, Vandita 
> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani ;
> > > Navare, Manasi D 
> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> > > engine for higher moderates
> > > 
> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> > > > Each VDSC operates with 1ppc throughput, hence enable the second VDSC
> > > > engine when moderate is higher that the current cdclk.
> > > >
> > > > Signed-off-by: Vandita Kulkarni 
> > > > ---
> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > index 161c33b2c869..55878f65f724 100644
> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > > > @@ -70,6 +70,7 @@
> > > >  #include "intel_tc.h"
> > > >  #include "intel_vdsc.h"
> > > >  #include "intel_vrr.h"
> > > > +#include "intel_cdclk.h"
> > > >
> > > >  #define DP_DPRX_ESI_LEN 14
> > > >
> > > > @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct
> > > intel_dp *intel_dp,
> > > >struct drm_connector_state 
> > > > *conn_state,
> > > >struct link_config_limits 
> > > > *limits)  {
> > > > +   struct intel_cdclk_state *cdclk_state;
> > > > struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
> > > > struct drm_i915_private *dev_priv = to_i915(dig_port-
> > > >base.base.dev);
> > > > const struct drm_display_mode *adjusted_mode =
> > > > &pipe_config->hw.adjusted_mode;
> > > > +   struct intel_atomic_state *state =
> > > > +   to_intel_atomic_state(pipe_config-
> > > >uapi.state);
> > > > int pipe_bpp;
> > > > int ret;
> > > >
> > > > @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct
> > > intel_dp *intel_dp,
> > > > }
> > > > }
> > > >
> > > > +   cdclk_state = intel_atomic_get_cdclk_state(state);
> > > > +   if (IS_ERR(cdclk_state))
> > > > +   return PTR_ERR(cdclk_state);
> > > > +
> > > > /*
> > > >  * VDSC engine operates at 1 Pixel per clock, so if peak pixel 
> > > > rate
> > > > -* is greater than the maximum Cdclock and if slice count is 
> > > > even
> > > > +* is greater than the current Cdclock and if slice count is 
> > > > even
> > > >  * then we need to use 2 VDSC instances.
> > > >  */
> > > > -   if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> > > > +   if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
> > > 
> > > This is wrong. We compute the cdclk based on the requirements of the
> > > mode/etc., not the other way around.

According to BSpec guideline, we decide whether we enable or disable second 
VDSC engine, based
on that condition. As I understand that one is about DSC config calculation, 
based on CDCLK
which was calculated. 

If we bump up CDCLK, to avoid this, will we even then use a second VDSC ever?

Another thing is that probably enabling second VDSC is cheaper in terms of 
power consumption,
than bumping up the CDCLK.

Stan

> > 
> > Okay , So you suggest that we set the cd clock to max when we have such 
> > requirement, than enabling the second engine?
> 
> That seems like the easiest solution. Another option might be to come up
> with some lower dotclock limit for the use of the second vdsc. But not
> sure we know where the tipping point is wrt. powr consumption.
> 
> -- 
> Ville Syrjälä
> Intel


Re: [Intel-gfx] [PATCH 0/1] lib, stackdepot: Add helper to print stack entries into buffer.

2021-09-14 Thread Vlastimil Babka
On 9/10/21 16:10, Imran Khan wrote:
> This change is in response to discussion at [1].
> The patch has been created on top of my earlier changes [2] and [3].
> If needed I can resend all of these patches together, though my
> earlier patches have been Acked.

I think you sent those at the beginning of merge window, so it would be best
to gather everything in a self-contained series now and resend. I suggested
another change for one of those anyway.

You can of course resend including the Acks you already got, as you did
already with "[PATCH v2 1/1] lib, stackdepot: Add helper to print stack
entries into buffer."

> [1] https://lore.kernel.org/lkml/e6f6fb85-1d83-425b-9e36-b5784cc9e...@suse.cz/
> [2] https://lore.kernel.org/lkml/fe94ffd8-d235-87d8-9c3d-80f7f73e0...@suse.cz/
> [3] https://lore.kernel.org/lkml/85f4f073-0b5a-9052-0ba9-74d450608...@suse.cz/
> 
> Imran Khan (1):
>   lib, stackdepot: Add helper to print stack entries into buffer.
> 
>  drivers/gpu/drm/drm_dp_mst_topology.c   |  5 +
>  drivers/gpu/drm/drm_mm.c|  5 +
>  drivers/gpu/drm/i915/i915_vma.c |  5 +
>  drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +---
>  include/linux/stackdepot.h  |  3 +++
>  lib/stackdepot.c| 23 +++
>  mm/page_owner.c |  5 +
>  7 files changed, 35 insertions(+), 31 deletions(-)
> 



Re: [Intel-gfx] [PATCH] drm/i915/dp: add a delay before setting panel brightness after power on

2021-09-14 Thread Jani Nikula
On Mon, 13 Sep 2021, Vasily Khoruzhick  wrote:
> Panel in my Dell XPS 7590, that uses Intel's HDR backlight interface to
> control brightness, apparently needs a delay before setting brightness
> after power on. Without this delay the panel does accept the setting
> and may come up with some arbitrary brightness (sometimes it's too dark,
> sometimes it's too bright, I wasn't able to find a system).
>
> I don't have access to the spec, so I'm not sure if it's expected
> behavior or a quirk for particular device.
>
> Delay was chosen by experiment: it works with 100ms, but fails with
> anything lower than 75ms.

Looks like we don't respect the panel delays for DPCD backlight. The
values are used for setting up the panel power sequencer, and thus PWM
based backlight, but we should probably use the delays in DPCD backlight
code too.

BR,
Jani.


>
> Signed-off-by: Vasily Khoruzhick 
> ---
>  drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c 
> b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
> index 4f8337c7fd2e..c4f35e1b5870 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
> +++ b/drivers/gpu/drm/i915/display/intel_dp_aux_backlight.c
> @@ -210,6 +210,10 @@ intel_dp_aux_hdr_enable_backlight(const struct 
> intel_crtc_state *crtc_state,
>  
>   ctrl = old_ctrl;
>   if (panel->backlight.edp.intel.sdr_uses_aux) {
> + /* Wait 100ms to ensure that panel is ready otherwise it may not
> +  * set chosen backlight level
> +  */
> + msleep(100);
>   ctrl |= INTEL_EDP_HDR_TCON_BRIGHTNESS_AUX_ENABLE;
>   intel_dp_aux_hdr_set_aux_backlight(conn_state, level);
>   } else {

-- 
Jani Nikula, Intel Open Source Graphics Center


[Intel-gfx] [PATCH] drm/i915: Remove warning from the rps worker

2021-09-14 Thread Tejas Upadhyay
In commit 4e5c8a99e1cb ("drm/i915: Drop i915_request.lock requirement
for intel_rps_boost()"), we decoupled the rps worker from the pm so
that we could avoid the synchronization penalty which makes the
assertion liable to run too early. Which makes warning invalid hence
removed.

Fixes: 4e5c8a99e1cb ("drm/i915: Drop i915_request.lock requirement for 
intel_rps_boost()")

Reviewed-by: Chris Wilson 
Signed-off-by: Tejas Upadhyay 
---
 drivers/gpu/drm/i915/gt/intel_rps.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
b/drivers/gpu/drm/i915/gt/intel_rps.c
index e1a198bbd135..172de6c9f949 100644
--- a/drivers/gpu/drm/i915/gt/intel_rps.c
+++ b/drivers/gpu/drm/i915/gt/intel_rps.c
@@ -882,8 +882,6 @@ void intel_rps_park(struct intel_rps *rps)
if (!intel_rps_is_enabled(rps))
return;
 
-   GEM_BUG_ON(atomic_read(&rps->num_waiters));
-
if (!intel_rps_clear_active(rps))
return;
 
-- 
2.31.1



Re: [Intel-gfx] [PATCH 1/4] drm/i915: rename debugfs_gt files

2021-09-14 Thread Jani Nikula
On Wed, 08 Sep 2021, Lucas De Marchi  wrote:
> We shouldn't be using debugfs_ namespace for this functionality. Rename
> debugfs_gt.[ch] to intel_gt_debugfs.[ch] and then make functions,
> defines and structs follow suit.
>
> While at it and since we are renaming the header, sort the includes
> alphabetically.

I didn't do a detailed review, maybe someone should, but superficially
seems good. On the series,

Acked-by: Jani Nikula 

>
> Signed-off-by: Lucas De Marchi 
> ---
>  drivers/gpu/drm/i915/Makefile  |  2 +-
>  drivers/gpu/drm/i915/gt/debugfs_engines.c  |  6 +++---
>  drivers/gpu/drm/i915/gt/debugfs_gt_pm.c| 14 +++---
>  drivers/gpu/drm/i915/gt/intel_gt.c |  6 +++---
>  .../gt/{debugfs_gt.c => intel_gt_debugfs.c}|  8 
>  .../gt/{debugfs_gt.h => intel_gt_debugfs.h}| 14 +++---
>  drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c   | 10 +-
>  drivers/gpu/drm/i915/gt/uc/intel_guc_debugfs.c | 18 +-
>  .../gpu/drm/i915/gt/uc/intel_guc_log_debugfs.c |  8 
>  drivers/gpu/drm/i915/gt/uc/intel_huc_debugfs.c |  6 +++---
>  drivers/gpu/drm/i915/gt/uc/intel_uc_debugfs.c  |  6 +++---
>  11 files changed, 49 insertions(+), 49 deletions(-)
>  rename drivers/gpu/drm/i915/gt/{debugfs_gt.c => intel_gt_debugfs.c} (87%)
>  rename drivers/gpu/drm/i915/gt/{debugfs_gt.h => intel_gt_debugfs.h} (71%)
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index c36c8a4f0716..3e171f0b5f6a 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -80,7 +80,6 @@ i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>  # "Graphics Technology" (aka we talk to the gpu)
>  gt-y += \
>   gt/debugfs_engines.o \
> - gt/debugfs_gt.o \
>   gt/debugfs_gt_pm.o \
>   gt/gen2_engine_cs.o \
>   gt/gen6_engine_cs.o \
> @@ -101,6 +100,7 @@ gt-y += \
>   gt/intel_gt.o \
>   gt/intel_gt_buffer_pool.o \
>   gt/intel_gt_clock_utils.o \
> + gt/intel_gt_debugfs.o \
>   gt/intel_gt_irq.o \
>   gt/intel_gt_pm.o \
>   gt/intel_gt_pm_irq.o \
> diff --git a/drivers/gpu/drm/i915/gt/debugfs_engines.c 
> b/drivers/gpu/drm/i915/gt/debugfs_engines.c
> index 5e3725e62241..2980dac5b171 100644
> --- a/drivers/gpu/drm/i915/gt/debugfs_engines.c
> +++ b/drivers/gpu/drm/i915/gt/debugfs_engines.c
> @@ -7,9 +7,9 @@
>  #include 
>  
>  #include "debugfs_engines.h"
> -#include "debugfs_gt.h"
>  #include "i915_drv.h" /* for_each_engine! */
>  #include "intel_engine.h"
> +#include "intel_gt_debugfs.h"
>  
>  static int engines_show(struct seq_file *m, void *data)
>  {
> @@ -24,11 +24,11 @@ static int engines_show(struct seq_file *m, void *data)
>  
>   return 0;
>  }
> -DEFINE_GT_DEBUGFS_ATTRIBUTE(engines);
> +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(engines);
>  
>  void debugfs_engines_register(struct intel_gt *gt, struct dentry *root)
>  {
> - static const struct debugfs_gt_file files[] = {
> + static const struct intel_gt_debugfs_file files[] = {
>   { "engines", &engines_fops },
>   };
>  
> diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c 
> b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
> index f6733f279890..9222cf68c56c 100644
> --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
> +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c
> @@ -6,11 +6,11 @@
>  
>  #include 
>  
> -#include "debugfs_gt.h"
>  #include "debugfs_gt_pm.h"
>  #include "i915_drv.h"
>  #include "intel_gt.h"
>  #include "intel_gt_clock_utils.h"
> +#include "intel_gt_debugfs.h"
>  #include "intel_gt_pm.h"
>  #include "intel_llc.h"
>  #include "intel_rc6.h"
> @@ -36,7 +36,7 @@ static int fw_domains_show(struct seq_file *m, void *data)
>  
>   return 0;
>  }
> -DEFINE_GT_DEBUGFS_ATTRIBUTE(fw_domains);
> +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(fw_domains);
>  
>  static void print_rc6_res(struct seq_file *m,
> const char *title,
> @@ -238,7 +238,7 @@ static int drpc_show(struct seq_file *m, void *unused)
>  
>   return err;
>  }
> -DEFINE_GT_DEBUGFS_ATTRIBUTE(drpc);
> +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(drpc);
>  
>  static int frequency_show(struct seq_file *m, void *unused)
>  {
> @@ -480,7 +480,7 @@ static int frequency_show(struct seq_file *m, void 
> *unused)
>  
>   return 0;
>  }
> -DEFINE_GT_DEBUGFS_ATTRIBUTE(frequency);
> +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(frequency);
>  
>  static int llc_show(struct seq_file *m, void *data)
>  {
> @@ -533,7 +533,7 @@ static bool llc_eval(void *data)
>   return HAS_LLC(gt->i915);
>  }
>  
> -DEFINE_GT_DEBUGFS_ATTRIBUTE(llc);
> +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(llc);
>  
>  static const char *rps_power_to_str(unsigned int power)
>  {
> @@ -612,11 +612,11 @@ static bool rps_eval(void *data)
>   return HAS_RPS(gt->i915);
>  }
>  
> -DEFINE_GT_DEBUGFS_ATTRIBUTE(rps_boost);
> +DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
>  
>  void debugfs_gt_pm_register(struct intel_gt *gt, struct dentry *root)
>  {
> - static const struct

[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for kernel/locking: Add context to ww_mutex_trylock. (rev4)

2021-09-14 Thread Patchwork
== Series Details ==

Series: kernel/locking: Add context to ww_mutex_trylock. (rev4)
URL   : https://patchwork.freedesktop.org/series/94437/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
3e5b7ed1e30d kernel/locking: Add context to ww_mutex_trylock.
-:9: WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description 
(prefer a maximum 75 chars per line)
#9: 
> > + * ww_mutex_trylock - tries to acquire the w/w mutex with optional acquire 
> > context

-:144: CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files
#144: FILE: include/linux/ww_mutex.h:340:
+extern int __must_check ww_mutex_trylock(struct ww_mutex *lock,

-:217: CHECK:MACRO_ARG_REUSE: Macro argument reuse 'a' - possible side-effects?
#217: FILE: kernel/locking/test-ww_mutex.c:20:
+#define ww_acquire_init_noinject(a, b) do { \
+   ww_acquire_init((a), (b)); \
+   (a)->deadlock_inject_countdown = ~0U; \
+   } while (0)

-:372: WARNING:PREFER_PR_LEVEL: Prefer [subsystem eg: 
netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to 
printk(KERN_INFO ...
#372: FILE: kernel/locking/test-ww_mutex.c:625:
+   printk(KERN_INFO "Beginning ww mutex selftests\n");

-:405: WARNING:PREFER_PR_LEVEL: Prefer [subsystem eg: 
netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(...  to 
printk(KERN_INFO ...
#405: FILE: kernel/locking/test-ww_mutex.c:665:
+   printk(KERN_INFO "All ww mutex selftests passed\n");

-:457: ERROR:MISSING_SIGN_OFF: Missing Signed-off-by: line(s)

total: 1 errors, 3 warnings, 2 checks, 332 lines checked




[Intel-gfx] ✗ Fi.CI.BAT: failure for kernel/locking: Add context to ww_mutex_trylock. (rev4)

2021-09-14 Thread Patchwork
== Series Details ==

Series: kernel/locking: Add context to ww_mutex_trylock. (rev4)
URL   : https://patchwork.freedesktop.org/series/94437/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10579 -> Patchwork_21036


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_21036 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21036, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/index.html

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_21036:

### IGT changes ###

 Possible regressions 

  * igt@i915_module_load@reload:
- fi-icl-u2:  NOTRUN -> [INCOMPLETE][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-icl-u2/igt@i915_module_l...@reload.html

  * igt@i915_selftest@live@mman:
- fi-cfl-8109u:   NOTRUN -> [INCOMPLETE][2]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-cfl-8109u/igt@i915_selftest@l...@mman.html
- fi-rkl-11600:   NOTRUN -> [INCOMPLETE][3]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-rkl-11600/igt@i915_selftest@l...@mman.html

  
 Warnings 

  * igt@i915_module_load@reload:
- fi-icl-y:   [TIMEOUT][4] -> [INCOMPLETE][5]
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10579/fi-icl-y/igt@i915_module_l...@reload.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-icl-y/igt@i915_module_l...@reload.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_module_load@reload:
- {fi-jsl-1}: [TIMEOUT][6] -> [INCOMPLETE][7]
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10579/fi-jsl-1/igt@i915_module_l...@reload.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-jsl-1/igt@i915_module_l...@reload.html

  * igt@runner@aborted:
- {fi-ehl-2}: NOTRUN -> [FAIL][8]
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-ehl-2/igt@run...@aborted.html

  
Known issues


  Here are the changes found in Patchwork_21036 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@runner@aborted:
- fi-rkl-11600:   NOTRUN -> [FAIL][9] ([i915#3928])
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-rkl-11600/igt@run...@aborted.html
- fi-cfl-8109u:   NOTRUN -> [FAIL][10] ([i915#2426] / [i915#3363])
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-cfl-8109u/igt@run...@aborted.html
- fi-icl-u2:  NOTRUN -> [FAIL][11] ([i915#2426] / [i915#3363] / 
[i915#3690])
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-icl-u2/igt@run...@aborted.html

  
 Possible fixes 

  * igt@core_hotunplug@unbind-rebind:
- fi-icl-u2:  [INCOMPLETE][12] -> [PASS][13]
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10579/fi-icl-u2/igt@core_hotunp...@unbind-rebind.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-icl-u2/igt@core_hotunp...@unbind-rebind.html
- fi-rkl-11600:   [INCOMPLETE][14] -> [PASS][15]
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10579/fi-rkl-11600/igt@core_hotunp...@unbind-rebind.html
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-rkl-11600/igt@core_hotunp...@unbind-rebind.html
- fi-cfl-8109u:   [INCOMPLETE][16] -> [PASS][17]
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10579/fi-cfl-8109u/igt@core_hotunp...@unbind-rebind.html
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21036/fi-cfl-8109u/igt@core_hotunp...@unbind-rebind.html

  
  {name}: This element is suppressed. This means it is ignored when computing
  the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#2426]: https://gitlab.freedesktop.org/drm/intel/issues/2426
  [i915#3363]: https://gitlab.freedesktop.org/drm/intel/issues/3363
  [i915#3690]: https://gitlab.freedesktop.org/drm/intel/issues/3690
  [i915#3928]: https://gitlab.freedesktop.org/drm/intel/issues/3928


Participating hosts (39 -> 36)
--

  Missing(3): fi-tgl-1115g4 fi-bsw-cyan fi-bdw-samus 


Build changes
-

  * Linux: CI_DRM_10579 -> Patchwork_21036

  CI-20190529: 20190529
  CI_DRM_10579: a83151fa02e8d3e90729db21ee0e3830ff8c9565 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6209: 07d6594ed02f55b68d64fa6dd7f80cfbc1ce4ef8 @ 
https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_21036: 3e5b7ed1e30dd3a8399c11395786546eb4e52ab5 @ 
git://anongit.freedesktop.org

[Intel-gfx] ✗ Fi.CI.CHECKPATCH: warning for series starting with [v2,1/7] drm/i915/gem: Break out some shmem backend utils

2021-09-14 Thread Patchwork
== Series Details ==

Series: series starting with [v2,1/7] drm/i915/gem: Break out some shmem 
backend utils
URL   : https://patchwork.freedesktop.org/series/94648/
State : warning

== Summary ==

$ dim checkpatch origin/drm-tip
d3fcbc14d6ee drm/i915/gem: Break out some shmem backend utils
5e8ae127a95f drm/ttm: add TTM_PAGE_FLAG_SHMEM
56dd3266c291 drm/i915/ttm: add tt shmem backend
89087b82443e drm/i915/ttm: use cached system pages when evicting lmem
3eb4ab240a73 drm/i915: try to simplify make_{un}shrinkable
-:164: CHECK:LINE_SPACING: Please don't use multiple blank lines
#164: FILE: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c:510:
 
+

total: 0 errors, 0 warnings, 1 checks, 194 lines checked
f2013b99cd76 drm/i915/ttm: make evicted shmem pages visible to the shrinker
dba19f313885 drm/i915/ttm: enable shmem tt backend




[Intel-gfx] ✗ Fi.CI.SPARSE: warning for series starting with [v2,1/7] drm/i915/gem: Break out some shmem backend utils

2021-09-14 Thread Patchwork
== Series Details ==

Series: series starting with [v2,1/7] drm/i915/gem: Break out some shmem 
backend utils
URL   : https://patchwork.freedesktop.org/series/94648/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
-
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must be 1 KB"
+./drivers/gpu/drm/amd/amdgpu/../amdgpu/amdgv_sriovmsg.h:312:49: error: static 
assertion failed: "amd_sriov_msg_vf2pf_info must b

[Intel-gfx] ✗ Fi.CI.BAT: failure for series starting with [v2,1/7] drm/i915/gem: Break out some shmem backend utils

2021-09-14 Thread Patchwork
== Series Details ==

Series: series starting with [v2,1/7] drm/i915/gem: Break out some shmem 
backend utils
URL   : https://patchwork.freedesktop.org/series/94648/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10579 -> Patchwork_21037


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_21037 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21037, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21037/index.html

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_21037:

### IGT changes ###

 Possible regressions 

  * igt@gem_exec_suspend@basic-s3:
- bat-dg1-5:  NOTRUN -> [INCOMPLETE][1]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21037/bat-dg1-5/igt@gem_exec_susp...@basic-s3.html

  * igt@runner@aborted:
- bat-dg1-5:  NOTRUN -> [FAIL][2]
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21037/bat-dg1-5/igt@run...@aborted.html

  
Known issues


  Here are the changes found in Patchwork_21037 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@fbdev@info:
- bat-dg1-5:  NOTRUN -> [SKIP][3] ([i915#2582]) +4 similar issues
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21037/bat-dg1-5/igt@fb...@info.html

  * igt@gem_exec_gttfill@basic:
- bat-dg1-5:  NOTRUN -> [SKIP][4] ([i915#4086])
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21037/bat-dg1-5/igt@gem_exec_gttf...@basic.html

  
  [i915#2582]: https://gitlab.freedesktop.org/drm/intel/issues/2582
  [i915#4086]: https://gitlab.freedesktop.org/drm/intel/issues/4086


Participating hosts (39 -> 38)
--

  Additional (1): bat-dg1-5 
  Missing(2): fi-bsw-cyan fi-bdw-samus 


Build changes
-

  * Linux: CI_DRM_10579 -> Patchwork_21037

  CI-20190529: 20190529
  CI_DRM_10579: a83151fa02e8d3e90729db21ee0e3830ff8c9565 @ 
git://anongit.freedesktop.org/gfx-ci/linux
  IGT_6209: 07d6594ed02f55b68d64fa6dd7f80cfbc1ce4ef8 @ 
https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_21037: dba19f313885f552e2cdd7df3c3a267618329722 @ 
git://anongit.freedesktop.org/gfx-ci/linux


== Linux commits ==

dba19f313885 drm/i915/ttm: enable shmem tt backend
f2013b99cd76 drm/i915/ttm: make evicted shmem pages visible to the shrinker
3eb4ab240a73 drm/i915: try to simplify make_{un}shrinkable
89087b82443e drm/i915/ttm: use cached system pages when evicting lmem
56dd3266c291 drm/i915/ttm: add tt shmem backend
5e8ae127a95f drm/ttm: add TTM_PAGE_FLAG_SHMEM
d3fcbc14d6ee drm/i915/gem: Break out some shmem backend utils

== Logs ==

For more details see: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21037/index.html


Re: [Intel-gfx] [PATCH 15/26] drm/i915: use the new iterator in i915_request_await_object

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 14:16, Christian König wrote:

Simplifying the code a bit.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/i915_request.c | 36 ++---
  1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c
index 37aef1308573..b81045ceb619 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1583,38 +1583,16 @@ i915_request_await_object(struct i915_request *to,
  struct drm_i915_gem_object *obj,
  bool write)
  {
-   struct dma_fence *excl;
+   struct dma_resv_cursor cursor;
+   struct dma_fence *fence;
int ret = 0;
  
-	if (write) {

-   struct dma_fence **shared;
-   unsigned int count, i;
-
-   ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
- &shared);
-   if (ret)
-   return ret;
-
-   for (i = 0; i < count; i++) {
-   ret = i915_request_await_dma_fence(to, shared[i]);
-   if (ret)
-   break;
-
-   dma_fence_put(shared[i]);
+   dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, write, fence) 
{


I think callers have the object locked for this one. At least if you 
haven't tried it it's worth asking CI (you have the assert already so it 
will tell you). But I think it's important to have an atomic snapshot here.


Regards,

Tvrtko


+   ret = i915_request_await_dma_fence(to, fence);
+   if (ret) {
+   dma_fence_put(fence);
+   break;
}
-
-   for (; i < count; i++)
-   dma_fence_put(shared[i]);
-   kfree(shared);
-   } else {
-   excl = dma_resv_get_excl_unlocked(obj->base.resv);
-   }
-
-   if (excl) {
-   if (ret == 0)
-   ret = i915_request_await_dma_fence(to, excl);
-
-   dma_fence_put(excl);
}
  
  	return ret;




Re: [Intel-gfx] [RFC PATCH] drm/ttm: Add a private member to the struct ttm_resource

2021-09-14 Thread Thomas Hellström
On Tue, 2021-09-14 at 10:53 +0200, Christian König wrote:
> Am 14.09.21 um 10:27 schrieb Thomas Hellström:
> > On Tue, 2021-09-14 at 09:40 +0200, Christian König wrote:
> > > Am 13.09.21 um 14:41 schrieb Thomas Hellström:
> > > > [SNIP]
> > > > > > > Let's say you have a struct ttm_object_vram and a struct
> > > > > > > ttm_object_gtt, both subclassing drm_gem_object. Then I'd
> > > > > > > say
> > > > > > > a
> > > > > > > driver would want to subclass those to attach identical
> > > > > > > data,
> > > > > > > extend functionality and provide a single i915_gem_object
> > > > > > > to
> > > > > > > the
> > > > > > > rest of the driver, which couldn't care less whether it's
> > > > > > > vram or
> > > > > > > gtt? Wouldn't you say having separate struct
> > > > > > > ttm_object_vram
> > > > > > > and a
> > > > > > > struct ttm_object_gtt in this case would be awkward?. We
> > > > > > > *want* to
> > > > > > > allow common handling.
> > > > > > Yeah, but that's a bad idea. This is like diamond
> > > > > > inheritance
> > > > > > in C++.
> > > > > > 
> > > > > > When you need the same functionality in different backends
> > > > > > you
> > > > > > implement that as separate object and then add a parent
> > > > > > class.
> > > > > > 
> > > > > > > It's the exact same situation here. With struct
> > > > > > > ttm_resource
> > > > > > > you
> > > > > > > let *different* implementation flavours subclass it,
> > > > > > > which
> > > > > > > makes it
> > > > > > > awkward for the driver to extend the functionality in a
> > > > > > > common way
> > > > > > > by subclassing, unless the driver only uses a single
> > > > > > > implementation.
> > > > > > Well the driver should use separate implementations for
> > > > > > their
> > > > > > different domains as much as possible.
> > > > > > 
> > > > > Hmm, Now you lost me a bit. Are you saying that the way we do
> > > > > dynamic
> > > > > backends in the struct ttm_buffer_object to facilitate driver
> > > > > subclassing is a bad idea or that the RFC with backpointer is
> > > > > a
> > > > > bad
> > > > > idea?
> > > > > 
> > > > > 
> > > > Or if you mean diamond inheritance is bad, yes that's basically
> > > > my
> > > > point.
> > > That diamond inheritance is a bad idea. What I don't understand
> > > is
> > > why
> > > you need that in the first place?
> > > 
> > > Information that you attach to a resource are specific to the
> > > domain
> > > where the resource is allocated from. So why do you want to
> > > attach
> > > the
> > > same information to a resources from different domains?
> > Again, for the same reason that we do that with struct
> > i915_gem_objects
> > and struct ttm_tts, to extend the functionality. I mean information
> > that we attach when we subclass a struct ttm_buffer_object doesn't
> > necessarily care about whether it's a VRAM or a GTT object. In
> > exactly
> > the same way, information that we want to attach to a struct
> > ttm_resource doesn't necessarily care whether it's a system or a
> > VRAM
> > resource, and need not be specific to any of those.
> > 
> > In this particular case, as memory management becomes asynchronous,
> > you
> > can't attach things like sg-tables and gpu binding information to
> > the
> > gem object anymore, because the object may have a number of
> > migrations
> > in the pipeline. Such things need to be attached to the structure
> > that
> > abstracts the memory allocation, and which may have a completely
> > different lifetime than the object itself.
> > 
> > In our particular case we want to attach information for cached
> > page
> > lookup and and sg-table, and moving forward probably the gpu
> > binding
> > (vma) information, and that is the same information for any
> > ttm_resource regardless where it's allocated from.
> > 
> > Typical example: A pipelined GPU operation happening before an
> > async
> > eviction goes wrong. We need to error capture and reset. But if we
> > look
> > at the object for error capturing, it's already updated pointing to
> > an
> > after-eviction resource, and the resource sits on a ghost object
> > (or in
> > the future when ghost objects go away perhaps in limbo somewhere).
> > 
> > We need to capture the memory pointed to by the struct ttm_resource
> > the
> > GPU was referencing, and to be able to do that we need to cache
> > driver-
> > specific info on the resource. Typically an sg-list and GPU binding
> > information.
> > 
> > Anyway, that cached information needs to be destroyed together with
> > the
> > resource and thus we need to be able to access that information
> > from
> > the resource in some way, regardless whether it's a pointer or
> > whether
> > we embed the struct resource.
> > 
> > I think it's pretty important here that we (using the inheritance
> > diagram below) recognize the need for D to inherit from A, just
> > like we
> > do for objects or ttm_tts.
> > 
> > 
> > > > Looking at
> > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%

Re: [Intel-gfx] [PATCH 01/26] dma-buf: add dma_resv_for_each_fence_unlocked

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 14:16, Christian König wrote:

Abstract the complexity of iterating over all the fences
in a dma_resv object.

The new loop handles the whole RCU and retry dance and
returns only fences where we can be sure we grabbed the
right one.

Signed-off-by: Christian König 
---
  drivers/dma-buf/dma-resv.c | 63 ++
  include/linux/dma-resv.h   | 36 ++
  2 files changed, 99 insertions(+)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 84fbe60629e3..213a9b7251ca 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -323,6 +323,69 @@ void dma_resv_add_excl_fence(struct dma_resv *obj, struct 
dma_fence *fence)
  }
  EXPORT_SYMBOL(dma_resv_add_excl_fence);
  
+/**

+ * dma_resv_walk_unlocked - walk over fences in a dma_resv obj
+ * @obj: the dma_resv object
+ * @cursor: cursor to record the current position
+ * @all_fences: true returns also the shared fences
+ * @first: if we should start over
+ *
+ * Return all the fences in the dma_resv object which are not yet signaled.
+ * The returned fence has an extra local reference so will stay alive.
+ * If a concurrent modify is detected the whole iterator is started over again.
+ */
+struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj,
+struct dma_resv_cursor *cursor,
+bool all_fences, bool first)
+{
+   struct dma_fence *fence = NULL;
+
+   do {
+   /* Drop the reference from the previous round */
+   dma_fence_put(fence);
+
+   cursor->is_first = first;
+   if (first) {
+   cursor->seq = read_seqcount_begin(&obj->seq);
+   cursor->index = -1;
+   cursor->fences = dma_resv_shared_list(obj);
+   cursor->is_exclusive = true;
+
+   fence = dma_resv_excl_fence(obj);
+   if (fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &fence->flags))
+   fence = NULL;
+   } else {
+   fence = NULL;
+   }
+
+   if (fence) {
+   fence = dma_fence_get_rcu(fence);
+   } else if (all_fences && cursor->fences) {
+   struct dma_resv_list *fences = cursor->fences;


If rcu lock is allowed to be dropped while walking the list what 
guarantees list of fences hasn't been freed?


Like:

1st call
  -> gets seqcount
  -> stores cursor->fences

rcu lock dropped/re-acquired

2nd call
  -> dereferences into cursor->fences -> boom?


+
+   cursor->is_exclusive = false;
+   while (++cursor->index < fences->shared_count) {
+   fence = rcu_dereference(fences->shared[
+   cursor->index]);
+   if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &fence->flags))
+   break;
+   }
+   if (cursor->index < fences->shared_count)
+   fence = dma_fence_get_rcu(fence);
+   else
+   fence = NULL;
+   }
+
+   /* For the eventually next round */
+   first = true;
+   } while (read_seqcount_retry(&obj->seq, cursor->seq));
+
+   return fence;
+}
+EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked);
+
  /**
   * dma_resv_copy_fences - Copy all fences from src to dst.
   * @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 9100dd3dc21f..f5b91c292ee0 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -149,6 +149,39 @@ struct dma_resv {
struct dma_resv_list __rcu *fence;
  };
  
+/**

+ * struct dma_resv_cursor - current position into the dma_resv fences
+ * @seq: sequence number to check
+ * @index: index into the shared fences
+ * @shared: the shared fences
+ * @is_first: true if this is the first returned fence
+ * @is_exclusive: if the current fence is the exclusive one
+ */
+struct dma_resv_cursor {
+   unsigned int seq;
+   unsigned int index;
+   struct dma_resv_list *fences;
+   bool is_first;


Is_first is useful to callers - like they are legitimately allowed to 
look inside this, what could otherwise be private object? What is the 
intended use case, given when true the returned fence can be either 
exclusive or first from a shared list?



+   bool is_exclusive;


Is_exclusive could be written as index == -1 in the code, right? If so 
then an opportunity to remove some redundancy.



+};
+
+/**
+ * dma_resv_for_each_fence_unlocked - fence iterator
+ * @obj: a dma_resv object pointer
+ * @c

Re: [Intel-gfx] [PATCH 15/26] drm/i915: use the new iterator in i915_request_await_object

2021-09-14 Thread Tvrtko Ursulin



On 14/09/2021 11:39, Christian König wrote:

Am 14.09.21 um 12:26 schrieb Tvrtko Ursulin:


On 13/09/2021 14:16, Christian König wrote:

Simplifying the code a bit.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/i915_request.c | 36 ++---
  1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c

index 37aef1308573..b81045ceb619 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1583,38 +1583,16 @@ i915_request_await_object(struct i915_request 
*to,

    struct drm_i915_gem_object *obj,
    bool write)
  {
-    struct dma_fence *excl;
+    struct dma_resv_cursor cursor;
+    struct dma_fence *fence;
  int ret = 0;
  -    if (write) {
-    struct dma_fence **shared;
-    unsigned int count, i;
-
-    ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
-  &shared);
-    if (ret)
-    return ret;
-
-    for (i = 0; i < count; i++) {
-    ret = i915_request_await_dma_fence(to, shared[i]);
-    if (ret)
-    break;
-
-    dma_fence_put(shared[i]);
+    dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, write, 
fence) {


I think callers have the object locked for this one. At least if you 
haven't tried it it's worth asking CI (you have the assert already so 
it will tell you). But I think it's important to have an atomic 
snapshot here.


Thanks for the info. In this case I'm just going to use the locked 
variant of the iterator here for the next round.


Could you point me to the place where the lock is grabed/released for 
reference?


There is quite a few callers and I haven't audited all of them. But I 
think, given the function is used for setting up tracking of implicit 
dependencies, that it has to be true.


In the case of execbuf for instance the flow is relatively complicated:

i915_gem_do_execbuffer
  eb_relocate_parse
eb_validate_vmas
  eb_lock_vmas
i915_gem_object_lock
  eb_submit
eb_move_to_gpu
  i915_request_await_object
  i915_gem_ww_ctx_fini
i915_gem_ww_ctx_unlock_all
  i915_gem_object_unlock

Other call sites have simpler flows but there is a lot of them so I 
think using CI is easiest.


Regards,

Tvrtko


Thanks,
Christian.



Regards,

Tvrtko


+    ret = i915_request_await_dma_fence(to, fence);
+    if (ret) {
+    dma_fence_put(fence);
+    break;
  }
-
-    for (; i < count; i++)
-    dma_fence_put(shared[i]);
-    kfree(shared);
-    } else {
-    excl = dma_resv_get_excl_unlocked(obj->base.resv);
-    }
-
-    if (excl) {
-    if (ret == 0)
-    ret = i915_request_await_dma_fence(to, excl);
-
-    dma_fence_put(excl);
  }
    return ret;





[Intel-gfx] ✗ Fi.CI.BAT: failure for drm/i915: Remove warning from the rps worker

2021-09-14 Thread Patchwork
== Series Details ==

Series: drm/i915: Remove warning from the rps worker
URL   : https://patchwork.freedesktop.org/series/94650/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10580 -> Patchwork_21038


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_21038 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21038, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/index.html

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_21038:

### IGT changes ###

 Possible regressions 

  * igt@core_hotunplug@unbind-rebind:
- fi-skl-6700k2:  [PASS][1] -> [INCOMPLETE][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10580/fi-skl-6700k2/igt@core_hotunp...@unbind-rebind.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-skl-6700k2/igt@core_hotunp...@unbind-rebind.html
- fi-tgl-1115g4:  NOTRUN -> [INCOMPLETE][3]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@core_hotunp...@unbind-rebind.html
- fi-cfl-8700k:   [PASS][4] -> [INCOMPLETE][5]
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10580/fi-cfl-8700k/igt@core_hotunp...@unbind-rebind.html
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-cfl-8700k/igt@core_hotunp...@unbind-rebind.html
- fi-skl-guc: [PASS][6] -> [INCOMPLETE][7]
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10580/fi-skl-guc/igt@core_hotunp...@unbind-rebind.html
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-skl-guc/igt@core_hotunp...@unbind-rebind.html

  * igt@i915_selftest@live@mman:
- fi-cfl-8109u:   NOTRUN -> [INCOMPLETE][8]
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-cfl-8109u/igt@i915_selftest@l...@mman.html

  
 Warnings 

  * igt@i915_module_load@reload:
- fi-icl-y:   [TIMEOUT][9] -> [INCOMPLETE][10]
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10580/fi-icl-y/igt@i915_module_l...@reload.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-icl-y/igt@i915_module_l...@reload.html

  
 Suppressed 

  The following results come from untrusted machines, tests, or statuses.
  They do not affect the overall result.

  * igt@i915_module_load@reload:
- {fi-jsl-1}: [TIMEOUT][11] -> [INCOMPLETE][12]
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10580/fi-jsl-1/igt@i915_module_l...@reload.html
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-jsl-1/igt@i915_module_l...@reload.html

  
Known issues


  Here are the changes found in Patchwork_21038 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_huc_copy@huc-copy:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][13] ([i915#2190])
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@gem_huc_c...@huc-copy.html

  * igt@i915_pm_backlight@basic-brightness:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][14] ([i915#1155])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@i915_pm_backli...@basic-brightness.html

  * igt@i915_selftest@live@mman:
- fi-pnv-d510:NOTRUN -> [INCOMPLETE][15] ([i915#299])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-pnv-d510/igt@i915_selftest@l...@mman.html

  * igt@kms_chamelium@common-hpd-after-suspend:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][16] ([fdo#111827]) +8 similar issues
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@kms_chamel...@common-hpd-after-suspend.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][17] ([i915#4103]) +1 similar issue
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@kms_cursor_leg...@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_force_connector_basic@force-load-detect:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][18] ([fdo#109285])
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@kms_force_connector_ba...@force-load-detect.html

  * igt@kms_psr@primary_mmap_gtt:
- fi-tgl-1115g4:  NOTRUN -> [SKIP][19] ([i915#1072]) +3 similar issues
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-tgl-1115g4/igt@kms_psr@primary_mmap_gtt.html

  * igt@prime_vgem@basic-userptr:
- fi-pnv-d510:NOTRUN -> [SKIP][20] ([fdo#109271]) +30 similar issues
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21038/fi-pnv-d510/igt@prime_v...@basic-userptr.html
- fi-

Re: [Intel-gfx] [RFC PATCH] drm/ttm: Add a private member to the struct ttm_resource

2021-09-14 Thread Christian König

Am 13.09.21 um 14:41 schrieb Thomas Hellström:

[SNIP]
Let's say you have a struct ttm_object_vram and a struct 
ttm_object_gtt, both subclassing drm_gem_object. Then I'd say a 
driver would want to subclass those to attach identical data, 
extend functionality and provide a single i915_gem_object to the 
rest of the driver, which couldn't care less whether it's vram or 
gtt? Wouldn't you say having separate struct ttm_object_vram and a 
struct ttm_object_gtt in this case would be awkward?. We *want* to 
allow common handling.


Yeah, but that's a bad idea. This is like diamond inheritance in C++.

When you need the same functionality in different backends you 
implement that as separate object and then add a parent class.




It's the exact same situation here. With struct ttm_resource you 
let *different* implementation flavours subclass it, which makes it 
awkward for the driver to extend the functionality in a common way 
by subclassing, unless the driver only uses a single implementation.


Well the driver should use separate implementations for their 
different domains as much as possible.


Hmm, Now you lost me a bit. Are you saying that the way we do dynamic 
backends in the struct ttm_buffer_object to facilitate driver 
subclassing is a bad idea or that the RFC with backpointer is a bad 
idea?




Or if you mean diamond inheritance is bad, yes that's basically my point.


That diamond inheritance is a bad idea. What I don't understand is why 
you need that in the first place?


Information that you attach to a resource are specific to the domain 
where the resource is allocated from. So why do you want to attach the 
same information to a resources from different domains?




Looking at
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMultiple_inheritance%23%2Fmedia%2FFile%3ADiamond_inheritance.svg&data=04%7C01%7Cchristian.koenig%40amd.com%7Cece4bd8aab644feacc1808d976b3ca56%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637671336950757656%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=LPMnfvC1px0bW8o420vP72oBbkm1v76A%2B0PDUw7urQY%3D&reserved=0 



1)

A would be the struct ttm_resource itself,
D would be struct i915_resource,
B would be struct ttm_range_mgr_node,
C would be struct i915_ttm_buddy_resource

And we need to resolve the ambiguity using the awkward union 
construct, iff we need to derive from both B and C.


Struct ttm_buffer_object and struct ttm_tt instead have B) and C) 
being dynamic backends of A) or a single type derived from A) Hence 
the problem doesn't exist for these types.


So the question from last email remains, if ditching this RFC, can we 
have B) and C) implemented by helpers that can be used from D) and 
that don't derive from A?


Well we already have that in the form of drm_mm. I mean the 
ttm_range_manager is just a relatively small glue code which implements 
the TTMs resource interface using the drm_mm object and a spinlock. IIRC 
that less than 200 lines of code.


So you should already have the necessary helpers and just need to 
implement the resource manager as far as I can see.


I mean I reused the ttm_range_manager_node in for amdgpu_gtt_mgr and 
could potentially reuse a bit more of the ttm_range_manager code. But I 
don't see that as much of an issue, the extra functionality there is 
just minimal.


Regards,
Christian.



Thanks,

Thomas







Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Jani Nikula
On Tue, 14 Sep 2021, "Lisovskiy, Stanislav"  
wrote:
> On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
>> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
>> > > -Original Message-
>> > > From: Ville Syrjälä 
>> > > Sent: Tuesday, September 14, 2021 12:59 PM
>> > > To: Kulkarni, Vandita 
>> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani 
>> > > ;
>> > > Navare, Manasi D 
>> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
>> > > engine for higher moderates
>> > > 
>> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
>> > > > Each VDSC operates with 1ppc throughput, hence enable the second VDSC
>> > > > engine when moderate is higher that the current cdclk.
>> > > >
>> > > > Signed-off-by: Vandita Kulkarni 
>> > > > ---
>> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
>> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
>> > > >
>> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
>> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
>> > > > index 161c33b2c869..55878f65f724 100644
>> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
>> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
>> > > > @@ -70,6 +70,7 @@
>> > > >  #include "intel_tc.h"
>> > > >  #include "intel_vdsc.h"
>> > > >  #include "intel_vrr.h"
>> > > > +#include "intel_cdclk.h"
>> > > >
>> > > >  #define DP_DPRX_ESI_LEN 14
>> > > >
>> > > > @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct
>> > > intel_dp *intel_dp,
>> > > >   struct drm_connector_state 
>> > > > *conn_state,
>> > > >   struct link_config_limits 
>> > > > *limits)  {
>> > > > +  struct intel_cdclk_state *cdclk_state;
>> > > >struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
>> > > >struct drm_i915_private *dev_priv = to_i915(dig_port-
>> > > >base.base.dev);
>> > > >const struct drm_display_mode *adjusted_mode =
>> > > >&pipe_config->hw.adjusted_mode;
>> > > > +  struct intel_atomic_state *state =
>> > > > +  to_intel_atomic_state(pipe_config-
>> > > >uapi.state);
>> > > >int pipe_bpp;
>> > > >int ret;
>> > > >
>> > > > @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct
>> > > intel_dp *intel_dp,
>> > > >}
>> > > >}
>> > > >
>> > > > +  cdclk_state = intel_atomic_get_cdclk_state(state);
>> > > > +  if (IS_ERR(cdclk_state))
>> > > > +  return PTR_ERR(cdclk_state);
>> > > > +
>> > > >/*
>> > > > * VDSC engine operates at 1 Pixel per clock, so if peak pixel 
>> > > > rate
>> > > > -   * is greater than the maximum Cdclock and if slice count is 
>> > > > even
>> > > > +   * is greater than the current Cdclock and if slice count is 
>> > > > even
>> > > > * then we need to use 2 VDSC instances.
>> > > > */
>> > > > -  if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
>> > > > +  if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
>> > > 
>> > > This is wrong. We compute the cdclk based on the requirements of the
>> > > mode/etc., not the other way around.
>
> According to BSpec guideline, we decide whether we enable or disable second 
> VDSC engine, based
> on that condition. As I understand that one is about DSC config calculation, 
> based on CDCLK
> which was calculated. 

Point is, at the time compute_config gets called, what guarantees are
there that cdclk_state->actual.cdclk contains anything useful? This is
the design we have.

> If we bump up CDCLK, to avoid this, will we even then use a second VDSC ever?

I think we'll eventually need better logic than unconditionally bumping
to max, and it needs to take *both* the cdclk and the number of dsc
engines into account. The referenced bspec only has the vdsc clock
perspective, not overall perspective.

BR,
Jani.

> Another thing is that probably enabling second VDSC is cheaper in terms of 
> power consumption,
> than bumping up the CDCLK.
>
> Stan
>
>> > 
>> > Okay , So you suggest that we set the cd clock to max when we have such 
>> > requirement, than enabling the second engine?
>> 
>> That seems like the easiest solution. Another option might be to come up
>> with some lower dotclock limit for the use of the second vdsc. But not
>> sure we know where the tipping point is wrt. powr consumption.
>> 
>> -- 
>> Ville Syrjälä
>> Intel

-- 
Jani Nikula, Intel Open Source Graphics Center


[Intel-gfx] [PATCH v2 1/1] lib, stackdepot: Add helper to print stack entries into buffer.

2021-09-14 Thread Imran Khan
To print stack entries into a buffer, users of stackdepot,
first get a list of stack entries using stack_depot_fetch
and then print this list into a buffer using stack_trace_snprint.
Provide a helper in stackdepot for this purpose.
Also change above mentioned users to use this helper.

Signed-off-by: Imran Khan 
Suggested-by: Vlastimil Babka 

Acked-by: Vlastimil Babka 
---
 drivers/gpu/drm/drm_dp_mst_topology.c   |  5 +
 drivers/gpu/drm/drm_mm.c|  5 +
 drivers/gpu/drm/i915/i915_vma.c |  5 +
 drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +---
 include/linux/stackdepot.h  |  3 +++
 lib/stackdepot.c| 24 
 mm/page_owner.c |  5 +
 7 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c 
b/drivers/gpu/drm/drm_dp_mst_topology.c
index 86d13d6bc463..2d1adab9e360 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct 
drm_dp_mst_topology_ref_history *history,
for (i = 0; i < history->len; i++) {
const struct drm_dp_mst_topology_ref_entry *entry =
&history->entries[i];
-   ulong *entries;
-   uint nr_entries;
u64 ts_nsec = entry->ts_nsec;
u32 rem_nsec = do_div(ts_nsec, 10);
 
-   nr_entries = stack_depot_fetch(entry->backtrace, &entries);
-   stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4);
+   stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4);
 
drm_printf(&p, "  %d %ss (last at %5llu.%06u):\n%s",
   entry->count,
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 93d48a6f04ab..ca04d7f6f7b5 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node)
 static void show_leaks(struct drm_mm *mm)
 {
struct drm_mm_node *node;
-   unsigned long *entries;
-   unsigned int nr_entries;
char *buf;
 
buf = kmalloc(BUFSZ, GFP_KERNEL);
@@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm)
continue;
}
 
-   nr_entries = stack_depot_fetch(node->stack, &entries);
-   stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0);
+   stack_depot_snprint(node->stack, buf, BUFSZ);
DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s",
  node->start, node->size, buf);
}
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4b7fc4647e46..f2d9ed375109 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma)
 
 static void vma_print_allocator(struct i915_vma *vma, const char *reason)
 {
-   unsigned long *entries;
-   unsigned int nr_entries;
char buf[512];
 
if (!vma->node.stack) {
@@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const 
char *reason)
return;
}
 
-   nr_entries = stack_depot_fetch(vma->node.stack, &entries);
-   stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0);
+   stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0);
DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n",
 vma->node.start, vma->node.size, reason, buf);
 }
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c 
b/drivers/gpu/drm/i915/intel_runtime_pm.c
index eaf7688f517d..cc312f0a05eb 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void)
return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN);
 }
 
-static void __print_depot_stack(depot_stack_handle_t stack,
-   char *buf, int sz, int indent)
-{
-   unsigned long *entries;
-   unsigned int nr_entries;
-
-   nr_entries = stack_depot_fetch(stack, &entries);
-   stack_trace_snprint(buf, sz, entries, nr_entries, indent);
-}
-
 static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm)
 {
spin_lock_init(&rpm->debug.lock);
@@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct 
intel_runtime_pm *rpm,
if (!buf)
return;
 
-   __print_depot_stack(stack, buf, PAGE_SIZE, 2);
+   stack_depot_snprint(stack, buf, PAGE_SIZE, 2);
DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf);
 
stack = READ_ONCE(rpm->debug.last_release);
if (stack) {
-   __print_depot_stack(stack, buf, P

Re: [Intel-gfx] [PATCH 15/26] drm/i915: use the new iterator in i915_request_await_object

2021-09-14 Thread Christian König

Am 14.09.21 um 12:26 schrieb Tvrtko Ursulin:


On 13/09/2021 14:16, Christian König wrote:

Simplifying the code a bit.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/i915_request.c | 36 ++---
  1 file changed, 7 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_request.c 
b/drivers/gpu/drm/i915/i915_request.c

index 37aef1308573..b81045ceb619 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1583,38 +1583,16 @@ i915_request_await_object(struct i915_request 
*to,

    struct drm_i915_gem_object *obj,
    bool write)
  {
-    struct dma_fence *excl;
+    struct dma_resv_cursor cursor;
+    struct dma_fence *fence;
  int ret = 0;
  -    if (write) {
-    struct dma_fence **shared;
-    unsigned int count, i;
-
-    ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
-  &shared);
-    if (ret)
-    return ret;
-
-    for (i = 0; i < count; i++) {
-    ret = i915_request_await_dma_fence(to, shared[i]);
-    if (ret)
-    break;
-
-    dma_fence_put(shared[i]);
+    dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, write, 
fence) {


I think callers have the object locked for this one. At least if you 
haven't tried it it's worth asking CI (you have the assert already so 
it will tell you). But I think it's important to have an atomic 
snapshot here.


Thanks for the info. In this case I'm just going to use the locked 
variant of the iterator here for the next round.


Could you point me to the place where the lock is grabed/released for 
reference?


Thanks,
Christian.



Regards,

Tvrtko


+    ret = i915_request_await_dma_fence(to, fence);
+    if (ret) {
+    dma_fence_put(fence);
+    break;
  }
-
-    for (; i < count; i++)
-    dma_fence_put(shared[i]);
-    kfree(shared);
-    } else {
-    excl = dma_resv_get_excl_unlocked(obj->base.resv);
-    }
-
-    if (excl) {
-    if (ret == 0)
-    ret = i915_request_await_dma_fence(to, excl);
-
-    dma_fence_put(excl);
  }
    return ret;





Re: [Intel-gfx] [PATCH 1/1] lib, stackdepot: Add helper to print stack entries into buffer.

2021-09-14 Thread imran . f . khan




On 13/9/21 6:51 pm, Vlastimil Babka wrote:

On 9/10/21 16:10, Imran Khan wrote:

To print stack entries into a buffer, users of stackdepot,
first get a list of stack entries using stack_depot_fetch
and then print this list into a buffer using stack_trace_snprint.
Provide a helper in stackdepot for this purpose.
Also change above mentioned users to use this helper.

Signed-off-by: Imran Khan 
Suggested-by: Vlastimil Babka 


Acked-by: Vlastimil Babka 



Thanks for the review.

A comment below:



--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -214,6 +214,29 @@ static inline struct stack_record *find_stack(struct 
stack_record *bucket,
return NULL;
  }


[...]


+ */
+int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size,
+  int spaces)
+{
+   unsigned long *entries;
+   unsigned int nr_entries;
+
+   nr_entries = stack_depot_fetch(handle, &entries);
+   return stack_trace_snprint(buf, size, entries, nr_entries, 0);


stack_trace_snprint() has a WARN_ON(!entries).
So maybe we should not call it if nr_entries is 0 (because e.g. handle was
0) as the warnings are not useful in that case.


Agree. I have addressed this feedback in v2 of patch.


Thanks
-- Imran


Re: [Intel-gfx] [PATCH 01/26] dma-buf: add dma_resv_for_each_fence_unlocked

2021-09-14 Thread Christian König

Am 14.09.21 um 12:53 schrieb Tvrtko Ursulin:


On 13/09/2021 14:16, Christian König wrote:

Abstract the complexity of iterating over all the fences
in a dma_resv object.

The new loop handles the whole RCU and retry dance and
returns only fences where we can be sure we grabbed the
right one.

Signed-off-by: Christian König 
---
  drivers/dma-buf/dma-resv.c | 63 ++
  include/linux/dma-resv.h   | 36 ++
  2 files changed, 99 insertions(+)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 84fbe60629e3..213a9b7251ca 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -323,6 +323,69 @@ void dma_resv_add_excl_fence(struct dma_resv 
*obj, struct dma_fence *fence)

  }
  EXPORT_SYMBOL(dma_resv_add_excl_fence);
  +/**
+ * dma_resv_walk_unlocked - walk over fences in a dma_resv obj
+ * @obj: the dma_resv object
+ * @cursor: cursor to record the current position
+ * @all_fences: true returns also the shared fences
+ * @first: if we should start over
+ *
+ * Return all the fences in the dma_resv object which are not yet 
signaled.

+ * The returned fence has an extra local reference so will stay alive.
+ * If a concurrent modify is detected the whole iterator is started 
over again.

+ */
+struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj,
+ struct dma_resv_cursor *cursor,
+ bool all_fences, bool first)
+{
+    struct dma_fence *fence = NULL;
+
+    do {
+    /* Drop the reference from the previous round */
+    dma_fence_put(fence);
+
+    cursor->is_first = first;
+    if (first) {
+    cursor->seq = read_seqcount_begin(&obj->seq);
+    cursor->index = -1;
+    cursor->fences = dma_resv_shared_list(obj);
+    cursor->is_exclusive = true;
+
+    fence = dma_resv_excl_fence(obj);
+    if (fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+  &fence->flags))
+    fence = NULL;
+    } else {
+    fence = NULL;
+    }
+
+    if (fence) {
+    fence = dma_fence_get_rcu(fence);
+    } else if (all_fences && cursor->fences) {
+    struct dma_resv_list *fences = cursor->fences;


If rcu lock is allowed to be dropped while walking the list what 
guarantees list of fences hasn't been freed?


Ah, good point! We need to test the sequence number when we enter the 
function as well. Going to fix that.




Like:

1st call
  -> gets seqcount
  -> stores cursor->fences

rcu lock dropped/re-acquired

2nd call
  -> dereferences into cursor->fences -> boom?


+
+    cursor->is_exclusive = false;
+    while (++cursor->index < fences->shared_count) {
+    fence = rcu_dereference(fences->shared[
+    cursor->index]);
+    if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+  &fence->flags))
+    break;
+    }
+    if (cursor->index < fences->shared_count)
+    fence = dma_fence_get_rcu(fence);
+    else
+    fence = NULL;
+    }
+
+    /* For the eventually next round */
+    first = true;
+    } while (read_seqcount_retry(&obj->seq, cursor->seq));
+
+    return fence;
+}
+EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked);
+
  /**
   * dma_resv_copy_fences - Copy all fences from src to dst.
   * @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 9100dd3dc21f..f5b91c292ee0 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -149,6 +149,39 @@ struct dma_resv {
  struct dma_resv_list __rcu *fence;
  };
  +/**
+ * struct dma_resv_cursor - current position into the dma_resv fences
+ * @seq: sequence number to check
+ * @index: index into the shared fences
+ * @shared: the shared fences
+ * @is_first: true if this is the first returned fence
+ * @is_exclusive: if the current fence is the exclusive one
+ */
+struct dma_resv_cursor {
+    unsigned int seq;
+    unsigned int index;
+    struct dma_resv_list *fences;
+    bool is_first;


Is_first is useful to callers - like they are legitimately allowed to 
look inside this, what could otherwise be private object?


Yes, I was pondering on the same question. Key point is that this is 
only used by other dma_resv functions which also use cursor.fences for 
example.


So this is only supposed to be used by code working with other privates 
of the dma_resv object as well.


Maybe adding that as a comment?

What is the intended use case, given when true the returned fence can 
be either exclusive or first from a shared list?


To reset counters for and know that the sequence has restarted. See how 
this is used in dma_resv_copy_fences() for example:


    rcu_read_lock();
    dma_resv_for_each_fence_unlocked(dst, &cursor, true, f) {

    if (cursor.is_first) {
   

[Intel-gfx] [PATCH v2 0/1] lib, stackdepot: Add helper to print stack entries into buffer.

2021-09-14 Thread Imran Khan
Changes in v2:
  - Addressed review comment.
  - Added Acked from Vlastimil.
  - Fixed one mistake, due to which stack_trace_snprint was always
getting invoked with 0 as space value.
Changed it to make use of space argument, because users that
are printing stack entries into buffer, may intend to specify
indentation as well.

I also observed build error reported at [4]. This is happening
due to failure in cherry-picking this patch and this happens because
the earlier patches (mentioned in original cover lette), are not there
in the tree.

Please let me know, if it would be better if I send earlier patches with
this change as well. I can resend those patches keeping the Acked-by and
Reviewed-by tags, obtained in those patches.

[4] 
https://urldefense.com/v3/__https://patchwork.freedesktop.org/series/94553/__;!!ACWV5N9M2RV99hQ!f4tVsbn8O3mxUrGV-8udpN7kFqqQ84bFeSgkqV0aOmatwAJStZV0dkai1xkpsOSDpQ$

Original cover letter
--
This change is in response to discussion at [1].
The patch has been created on top of my earlier changes [2] and [3].
If needed I can resend all of these patches together, though my
earlier patches have been Acked.

[1] https://lore.kernel.org/lkml/e6f6fb85-1d83-425b-9e36-b5784cc9e...@suse.cz/
[2] https://lore.kernel.org/lkml/fe94ffd8-d235-87d8-9c3d-80f7f73e0...@suse.cz/
[3] https://lore.kernel.org/lkml/85f4f073-0b5a-9052-0ba9-74d450608...@suse.cz/

Imran Khan (1):
  lib, stackdepot: Add helper to print stack entries into buffer.

 drivers/gpu/drm/drm_dp_mst_topology.c   |  5 +
 drivers/gpu/drm/drm_mm.c|  5 +
 drivers/gpu/drm/i915/i915_vma.c |  5 +
 drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +---
 include/linux/stackdepot.h  |  3 +++
 lib/stackdepot.c| 24 
 mm/page_owner.c |  5 +
 7 files changed, 36 insertions(+), 31 deletions(-)

-- 
2.30.2



Re: [Intel-gfx] [PATCH 18/26] drm/i915: use new iterator in i915_gem_object_last_write_engine

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 14:16, Christian König wrote:

This is maybe even a fix since the RCU usage here looks incorrect.


What you think is incorrect? Pointless extra rcu locking?

Also, FWIW, I submitted a patch to remove this function altogether since 
its IMO pretty useless, just failed in getting anyone to ack it so far.


Regards,

Tvrtko


Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/gem/i915_gem_object.h | 15 +++
  1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index e9eecebf5c9d..3343922af4d6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -500,16 +500,15 @@ static inline struct intel_engine_cs *
  i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
  {
struct intel_engine_cs *engine = NULL;
+   struct dma_resv_cursor cursor;
struct dma_fence *fence;
  
-	rcu_read_lock();

-   fence = dma_resv_get_excl_unlocked(obj->base.resv);
-   rcu_read_unlock();
-
-   if (fence && dma_fence_is_i915(fence) && !dma_fence_is_signaled(fence))
-   engine = to_request(fence)->engine;
-   dma_fence_put(fence);
-
+   dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, false,
+fence) {
+   if (fence && dma_fence_is_i915(fence) &&
+   !dma_fence_is_signaled(fence))
+   engine = to_request(fence)->engine;
+   }
return engine;
  }
  



Re: [Intel-gfx] [PATCH 2/5] drm/i915/display/adlp: Add new PSR2 workarounds

2021-09-14 Thread Gwan-gyeong Mun




On 9/13/21 8:00 PM, Souza, Jose wrote:

On Mon, 2021-09-13 at 19:09 +0300, Gwan-gyeong Mun wrote:


On 9/10/21 7:29 PM, Souza, Jose wrote:

On Fri, 2021-09-10 at 16:38 +0300, Gwan-gyeong Mun wrote:


On 9/10/21 2:07 AM, José Roberto de Souza wrote:

Wa_16014451276 fixes the starting coordinate for PSR2 selective
updates. CHICKEN_TRANS definition of the workaround bit has a wrong
name based on workaround definition and HSD.

Wa_14014971508 allows the screen to continue to be updated when
coming back from DC5/DC6 and SF_SINGLE_FULL_FRAME bit is not kept
set in PSR2_MAN_TRK_CTL.

Wa_16012604467 fixes underruns when exiting PSR2 when it is in one
of its internal states.

Wa_14014971508 is still in pending status in BSpec but by
the time this is reviewed and ready to be merged it will be finalized.

BSpec: 54369
BSpec: 50054
Cc: Gwan-gyeong Mun 
Signed-off-by: José Roberto de Souza 
---
drivers/gpu/drm/i915/display/intel_psr.c | 23 ++-
drivers/gpu/drm/i915/i915_reg.h  |  4 
2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c 
b/drivers/gpu/drm/i915/display/intel_psr.c
index 36816abb3bcc0..92c0b2159559f 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -1086,6 +1086,12 @@ static void intel_psr_enable_source(struct intel_dp 
*intel_dp)
intel_de_write(dev_priv, reg, chicken);
}

+/* Wa_16014451276:adlp */
+if (IS_ALDERLAKE_P(dev_priv) &&
+intel_dp->psr.psr2_enabled)
+intel_de_rmw(dev_priv, CHICKEN_TRANS(cpu_transcoder), 0,
+ D13_1_BASED_X_GRANULARITY);

Depending on the capability of the PSR panel, the following setting may
not be necessary, could you add some comments such as "force enable
1-based X granularity on PSR2 VSC SDP"?


It was made sure that all alderlake-P BOM panels will have 1-based X 
granularity, I can add something like that.



+
/*
 * Per Spec: Avoid continuous PSR exit by masking MEMUP and HPD also
 * mask LPSP to avoid dependency on other drivers that might block
@@ -1131,6 +1137,11 @@ static void intel_psr_enable_source(struct intel_dp 
*intel_dp)
 TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder),
 TRANS_SET_CONTEXT_LATENCY_MASK,
 TRANS_SET_CONTEXT_LATENCY_VALUE(1));
+
+/* Wa_16012604467:adlp */
+if (IS_ALDERLAKE_P(dev_priv) && intel_dp->psr.psr2_enabled)
+intel_de_rmw(dev_priv, CLKGATE_DIS_MISC, 0,
+ CLKGATE_DIS_MISC_DMASC_GATING_DIS);
}

static bool psr_interrupt_error_check(struct intel_dp *intel_dp)
@@ -1320,6 +1331,11 @@ static void intel_psr_disable_locked(struct intel_dp 
*intel_dp)
 TRANS_SET_CONTEXT_LATENCY(intel_dp->psr.transcoder),
 TRANS_SET_CONTEXT_LATENCY_MASK, 0);

+/* Wa_16012604467:adlp */
+if (IS_ALDERLAKE_P(dev_priv) && intel_dp->psr.psr2_enabled)
+intel_de_rmw(dev_priv, CLKGATE_DIS_MISC,
+ CLKGATE_DIS_MISC_DMASC_GATING_DIS, 0);
+
intel_snps_phy_update_psr_power_state(dev_priv, phy, false);

/* Disable PSR on Sink */
@@ -1488,8 +1504,13 @@ static void psr2_man_trk_ctl_calc(struct 
intel_crtc_state *crtc_state,
u32 val = PSR2_MAN_TRK_CTL_ENABLE;

if (full_update) {
+/*
+ * Wa_14014971508:adlp
+ * SINGLE_FULL_FRAME bit is not hold in register so can not be
+ * restored by DMC, so using CONTINUOS_FULL_FRAME to mimic that
+ */
if (IS_ALDERLAKE_P(dev_priv))
-val |= ADLP_PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME;
+val |= ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME;
else
val |= PSR2_MAN_TRK_CTL_SF_SINGLE_FULL_FRAME;

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c2853cc005ee6..0de2f7541da6c 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -8235,6 +8235,7 @@ enum {
#define  VSC_DATA_SEL_SOFTWARE_CONTROLREG_BIT(25) /* GLK */
#define  FECSTALL_DIS_DPTSTREAM_DPTTGREG_BIT(23)
#define  DDI_TRAINING_OVERRIDE_ENABLEREG_BIT(19)
+#define  D13_1_BASED_X_GRANULARITYREG_BIT(18)

The meaning of this macro is to set "force enable 1-based X granularity
on PSR2 VSC SDP" in Display 13.1 ADL, so the meaning of the macro may be
a little ambiguous.


The name of registers are set to match specification name as close as possible 
not the use or meaning.

Yes, just looking at the macro, I thought that it could be interpreted
in two ways: D13 / 1_BASED_X_GRANULARITY or D13_1 / BASED_X_GRANULARITY.
If our macro naming convention is fine in this case, then I don't think
the code is the problem either.


Okay yes someone could interpret into those 2 ways but checking bspec makes it 
clears that it is the first one.
I can rename to ADLP_1_BASED_X_GRANULARITY if you think it would make it better.


the ADLP_1_BASED_X_GRANULARITY you suggested looks better.



#define  DDI_TRAINING_OVERRIDE_VALUEREG_BIT(18)
#define  DDIE_TRAINING_OVERRIDE_ENABLEREG_BIT(17) /* CHICKEN_TRANS_A only */
#define  DDIE_TRAINING_OVERRIDE_VALUEREG_BIT(16) /* CHICKEN_TRANS_A

Re: [Intel-gfx] [PATCH 18/26] drm/i915: use new iterator in i915_gem_object_last_write_engine

2021-09-14 Thread Christian König

Am 14.09.21 um 14:27 schrieb Tvrtko Ursulin:


On 13/09/2021 14:16, Christian König wrote:

This is maybe even a fix since the RCU usage here looks incorrect.


What you think is incorrect? Pointless extra rcu locking?


Yeah, exactly that. I also wondered for a second if rcu_read_lock() can 
nest or not. But obviously it either works or lockdep hasn't complained yet.


But I've made a mistake here and at a couple of other places to remove 
to many rcu_read_lock() calls. Thanks for pointing that out, going to 
fix it as well.


Also, FWIW, I submitted a patch to remove this function altogether 
since its IMO pretty useless, just failed in getting anyone to ack it 
so far.


I was on the edge of suggesting that as well since it's only debugfs 
usage looked quite pointless to me.


Feel free to CC me on the patch and you can have my acked-by.

Thanks,
Christian.



Regards,

Tvrtko


Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/gem/i915_gem_object.h | 15 +++
  1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h

index e9eecebf5c9d..3343922af4d6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -500,16 +500,15 @@ static inline struct intel_engine_cs *
  i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
  {
  struct intel_engine_cs *engine = NULL;
+    struct dma_resv_cursor cursor;
  struct dma_fence *fence;
  -    rcu_read_lock();
-    fence = dma_resv_get_excl_unlocked(obj->base.resv);
-    rcu_read_unlock();
-
-    if (fence && dma_fence_is_i915(fence) && 
!dma_fence_is_signaled(fence))

-    engine = to_request(fence)->engine;
-    dma_fence_put(fence);
-
+    dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, false,
+ fence) {
+    if (fence && dma_fence_is_i915(fence) &&
+    !dma_fence_is_signaled(fence))
+    engine = to_request(fence)->engine;
+    }
  return engine;
  }





Re: [Intel-gfx] [PATCH 17/26] drm/i915: use new iterator in i915_gem_object_wait_priority

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 14:16, Christian König wrote:

Simplifying the code a bit.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/gem/i915_gem_wait.c | 29 
  1 file changed, 5 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c 
b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
index 13174541f6c8..e2173a55e527 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -120,31 +120,12 @@ i915_gem_object_wait_priority(struct drm_i915_gem_object 
*obj,
  unsigned int flags,
  const struct i915_sched_attr *attr)
  {
-   struct dma_fence *excl;
-
-   if (flags & I915_WAIT_ALL) {
-   struct dma_fence **shared;
-   unsigned int count, i;
-   int ret;
-
-   ret = dma_resv_get_fences(obj->base.resv, &excl, &count,
- &shared);
-   if (ret)
-   return ret;
-
-   for (i = 0; i < count; i++) {
-   i915_gem_fence_wait_priority(shared[i], attr);
-   dma_fence_put(shared[i]);
-   }
-
-   kfree(shared);
-   } else {
-   excl = dma_resv_get_excl_unlocked(obj->base.resv);
-   }
+   struct dma_resv_cursor cursor;
+   struct dma_fence *fence;
  
-	if (excl) {

-   i915_gem_fence_wait_priority(excl, attr);
-   dma_fence_put(excl);
+   dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor,
+flags & I915_WAIT_ALL, fence) {
+   i915_gem_fence_wait_priority(fence, attr);


Do you know if the RCU lock is actually held here? I tried following the 
KMS code paths but got lost in core helpers and driver vfuncs.


Regards,

Tvrtko


}
return 0;
  }



Re: [Intel-gfx] [PATCH 4/5] drm/i915/display/psr: Use drm damage helpers to calculate plane damaged area

2021-09-14 Thread Gwan-gyeong Mun




On 9/13/21 7:45 PM, Souza, Jose wrote:

On Mon, 2021-09-13 at 19:03 +0300, Gwan-gyeong Mun wrote:


On 9/10/21 2:07 AM, José Roberto de Souza wrote:

drm_atomic_helper_damage_iter_init() + drm_atomic_for_each_plane_damage()
returns the full plane area in case no damaged area was set by
userspace or it was discarted by driver.

This is important to fix the rendering of userspace applications that
does frontbuffer rendering and notify driver about dirty areas but do
not set any dirty clips.

With this we don't need to worry about to check and mark the whole
area as damaged in page flips.

Another important change here is the move of
drm_atomic_add_affected_planes() call, it needs to called late
otherwise the area of all the planes would be added to pipe_clip and
not saving power.

Cc: Daniel Vetter 
Cc: Gwan-gyeong Mun 
Signed-off-by: José Roberto de Souza 
---
   drivers/gpu/drm/i915/display/intel_psr.c | 37 +---
   1 file changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_psr.c 
b/drivers/gpu/drm/i915/display/intel_psr.c
index 1a3effa3ce709..670b0ceba110f 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -22,6 +22,7 @@
*/

   #include 
+#include 

   #include "display/intel_dp.h"

@@ -1577,10 +1578,6 @@ int intel_psr2_sel_fetch_update(struct 
intel_atomic_state *state,
   if (!crtc_state->enable_psr2_sel_fetch)
   return 0;

-ret = drm_atomic_add_affected_planes(&state->base, &crtc->base);
-if (ret)
-return ret;
-
   /*
* Calculate minimal selective fetch area of each plane and calculate
* the pipe damaged area.
@@ -1590,8 +1587,8 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state 
*state,
   for_each_oldnew_intel_plane_in_state(state, plane, old_plane_state,
new_plane_state, i) {
   struct drm_rect src, damaged_area = { .y1 = -1 };
-struct drm_mode_rect *damaged_clips;
-u32 num_clips, j;
+struct drm_atomic_helper_damage_iter iter;
+struct drm_rect clip;

   if (new_plane_state->uapi.crtc != crtc_state->uapi.crtc)
   continue;
@@ -1611,8 +1608,6 @@ int intel_psr2_sel_fetch_update(struct intel_atomic_state 
*state,
   break;
   }

-num_clips = drm_plane_get_damage_clips_count(&new_plane_state->uapi);
-
   /*
* If visibility or plane moved, mark the whole plane area as
* damaged as it needs to be complete redraw in the new and old
@@ -1633,14 +1628,8 @@ int intel_psr2_sel_fetch_update(struct 
intel_atomic_state *state,
   clip_area_update(&pipe_clip, &damaged_area);
   }
   continue;
-} else if (new_plane_state->uapi.alpha != old_plane_state->uapi.alpha ||
-   (!num_clips &&
-new_plane_state->uapi.fb != old_plane_state->uapi.fb)) {
-/*
- * If the plane don't have damaged areas but the
- * framebuffer changed or alpha changed, mark the whole
- * plane area as damaged.
- */
+} else if (new_plane_state->uapi.alpha != old_plane_state->uapi.alpha) {
+/* If alpha changed mark the whole plane area as damaged */
   damaged_area.y1 = new_plane_state->uapi.dst.y1;
   damaged_area.y2 = new_plane_state->uapi.dst.y2;
   clip_area_update(&pipe_clip, &damaged_area);
@@ -1648,15 +1637,11 @@ int intel_psr2_sel_fetch_update(struct 
intel_atomic_state *state,
   }

   drm_rect_fp_to_int(&src, &new_plane_state->uapi.src);
-damaged_clips = drm_plane_get_damage_clips(&new_plane_state->uapi);

-for (j = 0; j < num_clips; j++) {
-struct drm_rect clip;
-
-clip.x1 = damaged_clips[j].x1;
-clip.y1 = damaged_clips[j].y1;
-clip.x2 = damaged_clips[j].x2;
-clip.y2 = damaged_clips[j].y2;
+drm_atomic_helper_damage_iter_init(&iter,
+   &old_plane_state->uapi,
+   &new_plane_state->uapi);

In the description of the drm_atomic_helper_damage_iter_init() function
says, in order to use drm_atomic_helper_damage_iter_init(), the driver
requires that the drm_atomic_helper_check_plane_state() helper function
should be called in advance.
However, in i915, drm_atomic_helper_check_plane_state() helper is not
used, and intel_atomic_plane_check_clipping() handles src.
And i915 is not using the atomic_check callback of
drm_plane_helper_funcs. Is it fine to use
drm_atomic_helper_damage_iter_init() in this case as well?


intel_atomic_plane_check_clipping() does the src rect rotation, scale and 
clipping that drm_atomic_helper_check_plane_state() also do, so we are safe
here.


ok then, the other changes look good to me.
Reviewed-by: Gwan-gyeong Mun 



+drm_atomic_for_each_plane_damage(&iter, &clip) {
   if (drm_rect_intersect(&clip, &src))
   clip_area_update(&damaged_area, &clip);
   }
@@ -1672,6 +1657,10 @@ int intel_psr2_sel_fetch_update(struct 
intel_atomic_state *state,
   if (full_update)
   goto skip_sel_fetch_set_loop;

+ret = drm_atomic_add_affected_planes(&state->base, &crtc->base);
+if (ret)
+return ret;
+
   intel_psr2_sel_fetch_pipe_alignment(crtc_state, &pipe_clip);

   /*





Re: [Intel-gfx] [PATCH v2] kernel/locking: Add context to ww_mutex_trylock.

2021-09-14 Thread Maarten Lankhorst
Op 14-09-2021 om 08:50 schreef Peter Zijlstra:
> On Mon, Sep 13, 2021 at 10:42:36AM +0200, Maarten Lankhorst wrote:
>
>>> +/**
>>> + * ww_mutex_trylock - tries to acquire the w/w mutex with optional acquire 
>>> context
>>> + * @ww: mutex to lock
>>> + * @ww_ctx: optional w/w acquire context
>>> + *
>>> + * Trylocks a mutex with the optional acquire context; no deadlock 
>>> detection is
>>> + * possible. Returns 1 if the mutex has been acquired successfully, 0 
>>> otherwise.
>>> + *
>>> + * Unlike ww_mutex_lock, no deadlock handling is performed. However, if a 
>>> @ctx is
>>> + * specified, -EALREADY handling may happen in calls to ww_mutex_trylock.
>>> + *
>>> + * A mutex acquired with this function must be released with 
>>> ww_mutex_unlock.
>>> + */
>>> +int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
>>> +{
>>> +   if (!ww_ctx)
>>> +   return mutex_trylock(&ww->base);
>>> +
>>> +   MUTEX_WARN_ON(ww->base.magic != &ww->base);
>>> +
>>> +   if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
>>> +   return -EALREADY;
>> I'm not 100% sure this is a good idea, because it would make the
>> trylock weird.  For i915 I checked manually, because I didn't want to
>> change the function signature. This is probably the other extreme.
>>
>> "if (ww_mutex_trylock())" would look correct, but actually be wrong
>> and lead to double unlock without adjustments.  Maybe we could make a
>> ww_mutex_trylock_ctx_err, which would return -EALREADY or -EBUSY on
>> failure, and 0 on success?  We could keep ww_mutex_trylock without
>> ctx, probably just #define as (!ww_mutex_trylock_ctx_err(lock, NULL))
> Urgh, yeah. Also, I suppose that if we already own it, we'll just fail
> the trylock anyway. Let me take this out.
>
>>> +   /*
>>> +* Reset the wounded flag after a kill. No other process can
>>> +* race and wound us here, since they can't have a valid owner
>>> +* pointer if we don't have any locks held.
>>> +*/
>>> +   if (ww_ctx->acquired == 0)
>>> +   ww_ctx->wounded = 0;
>> Yeah I guess this needs fixing too. Not completely sure since trylock
>> wouldn't do the whole ww dance, but since it's our first lock,
>> probably best to do so regardless so other users don't trip over it.
> This is actually critical, because if this trylock is the first lock
> acquisition for the context, there won't be any other opportunity to
> reset this value.
>
>>> +
>>> +   if (__mutex_trylock(&ww->base)) {
>>> +   ww_mutex_set_context_fastpath(ww, ww_ctx);
>>> +   mutex_acquire_nest(&ww->base.dep_map, 0, 1, &ww_ctx->dep_map, 
>>> _RET_IP_);
>>> +   return 1;
>>> +   }
>>> +
>>> +   return 0;
>>> +}
>>> +EXPORT_SYMBOL(ww_mutex_trylock);
> Updated version below...
>
> ---
> Subject: kernel/locking: Add context to ww_mutex_trylock()
> From: Maarten Lankhorst 
> Date: Thu, 9 Sep 2021 11:32:18 +0200
>
> From: Maarten Lankhorst 
>
> i915 will soon gain an eviction path that trylock a whole lot of locks
> for eviction, getting dmesg failures like below:
>
>   BUG: MAX_LOCK_DEPTH too low!
>   turning off the locking correctness validator.
>   depth: 48  max: 48!
>   48 locks held by i915_selftest/5776:
>#0: 888101a79240 (&dev->mutex){}-{3:3}, at: 
> __driver_attach+0x88/0x160
>#1: c99778c0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: 
> i915_vma_pin.constprop.63+0x39/0x1b0 [i915]
>#2: 88800cf74de8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> i915_vma_pin.constprop.63+0x5f/0x1b0 [i915]
>#3: 88810c7f9e38 (&vm->mutex/1){+.+.}-{3:3}, at: 
> i915_vma_pin_ww+0x1c4/0x9d0 [i915]
>#4: 88810bad5768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> i915_gem_evict_something+0x110/0x860 [i915]
>#5: 88810bad60e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> i915_gem_evict_something+0x110/0x860 [i915]
>   ...
>#46: 88811964d768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> i915_gem_evict_something+0x110/0x860 [i915]
>#47: 88811964e0e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> i915_gem_evict_something+0x110/0x860 [i915]
>   INFO: lockdep is turned off.
>
> Fixing eviction to nest into ww_class_acquire is a high priority, but
> it requires a rework of the entire driver, which can only be done one
> step at a time.
>
> As an intermediate solution, add an acquire context to
> ww_mutex_trylock, which allows us to do proper nesting annotations on
> the trylocks, making the above lockdep splat disappear.
>
> This is also useful in regulator_lock_nested, which may avoid dropping
> regulator_nesting_mutex in the uncontended path, so use it there.
>
> TTM may be another user for this, where we could lock a buffer in a
> fastpath with list locks held, without dropping all locks we hold.
>
> [peterz: rework actual ww_mutex_trylock() implementations]
> Signed-off-by: Maarten Lankhorst 
> Signed-off-by: Peter Zijlstra (Intel) 
> ---

My original patch series with this patch in place still passes i915 selftes

Re: [Intel-gfx] [PATCH 18/26] drm/i915: use new iterator in i915_gem_object_last_write_engine

2021-09-14 Thread Tvrtko Ursulin



On 14/09/2021 13:32, Christian König wrote:

Am 14.09.21 um 14:27 schrieb Tvrtko Ursulin:


On 13/09/2021 14:16, Christian König wrote:

This is maybe even a fix since the RCU usage here looks incorrect.


What you think is incorrect? Pointless extra rcu locking?


Yeah, exactly that. I also wondered for a second if rcu_read_lock() can 
nest or not. But obviously it either works or lockdep hasn't complained 
yet.


But I've made a mistake here and at a couple of other places to remove 
to many rcu_read_lock() calls. Thanks for pointing that out, going to 
fix it as well.


Ack.

Also, FWIW, I submitted a patch to remove this function altogether 
since its IMO pretty useless, just failed in getting anyone to ack it 
so far.


I was on the edge of suggesting that as well since it's only debugfs 
usage looked quite pointless to me.


Feel free to CC me on the patch and you can have my acked-by.


Patch is here 
https://patchwork.freedesktop.org/patch/451864/?series=94202&rev=1, thanks!


Regards,

Tvrtko


Thanks,
Christian.



Regards,

Tvrtko


Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/gem/i915_gem_object.h | 15 +++
  1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h 
b/drivers/gpu/drm/i915/gem/i915_gem_object.h

index e9eecebf5c9d..3343922af4d6 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -500,16 +500,15 @@ static inline struct intel_engine_cs *
  i915_gem_object_last_write_engine(struct drm_i915_gem_object *obj)
  {
  struct intel_engine_cs *engine = NULL;
+    struct dma_resv_cursor cursor;
  struct dma_fence *fence;
  -    rcu_read_lock();
-    fence = dma_resv_get_excl_unlocked(obj->base.resv);
-    rcu_read_unlock();
-
-    if (fence && dma_fence_is_i915(fence) && 
!dma_fence_is_signaled(fence))

-    engine = to_request(fence)->engine;
-    dma_fence_put(fence);
-
+    dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, false,
+ fence) {
+    if (fence && dma_fence_is_i915(fence) &&
+    !dma_fence_is_signaled(fence))
+    engine = to_request(fence)->engine;
+    }
  return engine;
  }





Re: [Intel-gfx] [RFC PATCH] drm/ttm: Add a private member to the struct ttm_resource

2021-09-14 Thread Christian König

Am 14.09.21 um 10:27 schrieb Thomas Hellström:

On Tue, 2021-09-14 at 09:40 +0200, Christian König wrote:

Am 13.09.21 um 14:41 schrieb Thomas Hellström:

[SNIP]

Let's say you have a struct ttm_object_vram and a struct
ttm_object_gtt, both subclassing drm_gem_object. Then I'd say
a
driver would want to subclass those to attach identical data,
extend functionality and provide a single i915_gem_object to
the
rest of the driver, which couldn't care less whether it's
vram or
gtt? Wouldn't you say having separate struct ttm_object_vram
and a
struct ttm_object_gtt in this case would be awkward?. We
*want* to
allow common handling.

Yeah, but that's a bad idea. This is like diamond inheritance
in C++.

When you need the same functionality in different backends you
implement that as separate object and then add a parent class.


It's the exact same situation here. With struct ttm_resource
you
let *different* implementation flavours subclass it, which
makes it
awkward for the driver to extend the functionality in a
common way
by subclassing, unless the driver only uses a single
implementation.

Well the driver should use separate implementations for their
different domains as much as possible.


Hmm, Now you lost me a bit. Are you saying that the way we do
dynamic
backends in the struct ttm_buffer_object to facilitate driver
subclassing is a bad idea or that the RFC with backpointer is a
bad
idea?



Or if you mean diamond inheritance is bad, yes that's basically my
point.

That diamond inheritance is a bad idea. What I don't understand is
why
you need that in the first place?

Information that you attach to a resource are specific to the domain
where the resource is allocated from. So why do you want to attach
the
same information to a resources from different domains?

Again, for the same reason that we do that with struct i915_gem_objects
and struct ttm_tts, to extend the functionality. I mean information
that we attach when we subclass a struct ttm_buffer_object doesn't
necessarily care about whether it's a VRAM or a GTT object. In exactly
the same way, information that we want to attach to a struct
ttm_resource doesn't necessarily care whether it's a system or a VRAM
resource, and need not be specific to any of those.

In this particular case, as memory management becomes asynchronous, you
can't attach things like sg-tables and gpu binding information to the
gem object anymore, because the object may have a number of migrations
in the pipeline. Such things need to be attached to the structure that
abstracts the memory allocation, and which may have a completely
different lifetime than the object itself.

In our particular case we want to attach information for cached page
lookup and and sg-table, and moving forward probably the gpu binding
(vma) information, and that is the same information for any
ttm_resource regardless where it's allocated from.

Typical example: A pipelined GPU operation happening before an async
eviction goes wrong. We need to error capture and reset. But if we look
at the object for error capturing, it's already updated pointing to an
after-eviction resource, and the resource sits on a ghost object (or in
the future when ghost objects go away perhaps in limbo somewhere).

We need to capture the memory pointed to by the struct ttm_resource the
GPU was referencing, and to be able to do that we need to cache driver-
specific info on the resource. Typically an sg-list and GPU binding
information.

Anyway, that cached information needs to be destroyed together with the
resource and thus we need to be able to access that information from
the resource in some way, regardless whether it's a pointer or whether
we embed the struct resource.

I think it's pretty important here that we (using the inheritance
diagram below) recognize the need for D to inherit from A, just like we
do for objects or ttm_tts.



Looking at
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMultiple_inheritance%23%2Fmedia%2FFile%3ADiamond_inheritance.svg&data=04%7C01%7Cchristian.koenig%40amd.com%7C268bb562db8548b285b408d977598b2c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637672048739103176%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=bPyDqiSF%2FHFZbl74ux0vfwh3uma5hZIUf2xbzb9yZz8%3D&reserved=0
  



1)

A would be the struct ttm_resource itself,
D would be struct i915_resource,
B would be struct ttm_range_mgr_node,
C would be struct i915_ttm_buddy_resource

And we need to resolve the ambiguity using the awkward union
construct, iff we need to derive from both B and C.

Struct ttm_buffer_object and struct ttm_tt instead have B) and C)
being dynamic backends of A) or a single type derived from A) Hence
the problem doesn't exist for these types.

So the question from last email remains, if ditching this RFC, can
we
have B) and C) implemented by helpers that can be used from D) and
that don't derive from A?

Well we alread

Re: [Intel-gfx] [PATCH v2 2/7] drm/ttm: add TTM_PAGE_FLAG_SHMEM

2021-09-14 Thread Christian König

Am 14.09.21 um 10:50 schrieb Matthew Auld:

Add new flag to indicate special shmem based tt, which can directly
handle swapping itself, and should be visible to some shrinker.

As part of this we should skip the ttm_pages_allocated accounting, since
such tt objects should already be reachable, and potentially reclaimable
by some shrinker, if under memory pressure, and so shouldn't directly
count towards the swap "watermark" level.

We also need to stop touching the page->mapping and page->index for such
objects, like in ttm_tt_add_mapping, since shmem already uses these.
Some drivers seems to depend on the tt mapping/index behaviour for their
own purposes, so directly using shmem tt likely won't be usable there
as-is.

Signed-off-by: Matthew Auld 
Cc: Thomas Hellström 
Cc: Christian König 
---
  drivers/gpu/drm/ttm/ttm_bo_vm.c |  4 ++--
  drivers/gpu/drm/ttm/ttm_tt.c| 10 +-
  include/drm/ttm/ttm_tt.h|  1 +
  3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index f56be5bc0861..e2131c73dcb6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -346,8 +346,8 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
} else if (unlikely(!page)) {
break;
}
-   page->index = drm_vma_node_start(&bo->base.vma_node) +
-   page_offset;
+   if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_SHMEM))
+   page->index = 
drm_vma_node_start(&bo->base.vma_node) + page_offset;


I still have a rather bad feeling about that.

This should either not be necessary any more in general or the shmemfile 
approach doesn't work correctly.


Please send a patch to remove this for everybody instead and we will see 
if that really works or not.



pfn = page_to_pfn(page);
}
  
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c

index dae52433beeb..cc4815c1f505 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -293,7 +293,7 @@ static void ttm_tt_add_mapping(struct ttm_device *bdev, 
struct ttm_tt *ttm)
  {
pgoff_t i;
  
-	if (ttm->page_flags & TTM_PAGE_FLAG_SG)

+   if (ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))


Maybe you should re-use the TTM_PAGE_FLAG_SG for this and/or rename the 
flag to better describe what it does.


Something like TTM_PAGE_FLAG_EXTERNAL or similar? The only other use 
case for TTM_PAGE_FLAG_SG which comes to my mind is controlling if the 
pages array is allocated or not.


Christian.


return;
  
  	for (i = 0; i < ttm->num_pages; ++i)

@@ -311,7 +311,7 @@ int ttm_tt_populate(struct ttm_device *bdev,
if (ttm_tt_is_populated(ttm))
return 0;
  
-	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {

+   if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
if (bdev->pool.use_dma32)
atomic_long_add(ttm->num_pages,
@@ -349,7 +349,7 @@ int ttm_tt_populate(struct ttm_device *bdev,
return 0;
  
  error:

-   if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
+   if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
if (bdev->pool.use_dma32)
atomic_long_sub(ttm->num_pages,
@@ -364,7 +364,7 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
pgoff_t i;
struct page **page = ttm->pages;
  
-	if (ttm->page_flags & TTM_PAGE_FLAG_SG)

+   if (ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))
return;
  
  	for (i = 0; i < ttm->num_pages; ++i) {

@@ -384,7 +384,7 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
ttm_tt *ttm)
else
ttm_pool_free(&bdev->pool, ttm);
  
-	if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {

+   if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
if (bdev->pool.use_dma32)
atomic_long_sub(ttm->num_pages,
diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h
index 89b15d673b22..20d550185065 100644
--- a/include/drm/ttm/ttm_tt.h
+++ b/include/drm/ttm/ttm_tt.h
@@ -42,6 +42,7 @@ struct ttm_operation_ctx;
  #define TTM_PAGE_FLAG_ZERO_ALLOC  (1 << 6)
  #define TTM_PAGE_FLAG_SG  (1 << 8)
  #define TTM_PAGE_FLAG_NO_RETRY  (1 << 9)
+#define TTM_PAGE_FLAG_SHMEM  (1 << 10)
  
  #define TTM_PAGE_FLAG_PRIV_POPULATED  (1 << 31)
  




Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Lisovskiy, Stanislav
On Tue, Sep 14, 2021 at 03:04:11PM +0300, Jani Nikula wrote:
> On Tue, 14 Sep 2021, "Lisovskiy, Stanislav"  
> wrote:
> > On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
> >> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
> >> > > -Original Message-
> >> > > From: Ville Syrjälä 
> >> > > Sent: Tuesday, September 14, 2021 12:59 PM
> >> > > To: Kulkarni, Vandita 
> >> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani 
> >> > > ;
> >> > > Navare, Manasi D 
> >> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> >> > > engine for higher moderates
> >> > > 
> >> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> >> > > > Each VDSC operates with 1ppc throughput, hence enable the second VDSC
> >> > > > engine when moderate is higher that the current cdclk.
> >> > > >
> >> > > > Signed-off-by: Vandita Kulkarni 
> >> > > > ---
> >> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> >> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
> >> > > >
> >> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> >> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
> >> > > > index 161c33b2c869..55878f65f724 100644
> >> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> >> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> >> > > > @@ -70,6 +70,7 @@
> >> > > >  #include "intel_tc.h"
> >> > > >  #include "intel_vdsc.h"
> >> > > >  #include "intel_vrr.h"
> >> > > > +#include "intel_cdclk.h"
> >> > > >
> >> > > >  #define DP_DPRX_ESI_LEN 14
> >> > > >
> >> > > > @@ -1291,10 +1292,13 @@ static int intel_dp_dsc_compute_config(struct
> >> > > intel_dp *intel_dp,
> >> > > > struct drm_connector_state 
> >> > > > *conn_state,
> >> > > > struct link_config_limits 
> >> > > > *limits)  {
> >> > > > +struct intel_cdclk_state *cdclk_state;
> >> > > >  struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
> >> > > >  struct drm_i915_private *dev_priv = to_i915(dig_port-
> >> > > >base.base.dev);
> >> > > >  const struct drm_display_mode *adjusted_mode =
> >> > > >  &pipe_config->hw.adjusted_mode;
> >> > > > +struct intel_atomic_state *state =
> >> > > > +to_intel_atomic_state(pipe_config-
> >> > > >uapi.state);
> >> > > >  int pipe_bpp;
> >> > > >  int ret;
> >> > > >
> >> > > > @@ -1373,12 +1377,16 @@ static int intel_dp_dsc_compute_config(struct
> >> > > intel_dp *intel_dp,
> >> > > >  }
> >> > > >  }
> >> > > >
> >> > > > +cdclk_state = intel_atomic_get_cdclk_state(state);
> >> > > > +if (IS_ERR(cdclk_state))
> >> > > > +return PTR_ERR(cdclk_state);
> >> > > > +
> >> > > >  /*
> >> > > >   * VDSC engine operates at 1 Pixel per clock, so if peak pixel 
> >> > > > rate
> >> > > > - * is greater than the maximum Cdclock and if slice count is 
> >> > > > even
> >> > > > + * is greater than the current Cdclock and if slice count is 
> >> > > > even
> >> > > >   * then we need to use 2 VDSC instances.
> >> > > >   */
> >> > > > -if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> >> > > > +if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
> >> > > 
> >> > > This is wrong. We compute the cdclk based on the requirements of the
> >> > > mode/etc., not the other way around.
> >
> > According to BSpec guideline, we decide whether we enable or disable second 
> > VDSC engine, based
> > on that condition. As I understand that one is about DSC config 
> > calculation, based on CDCLK
> > which was calculated. 
> 
> Point is, at the time compute_config gets called, what guarantees are
> there that cdclk_state->actual.cdclk contains anything useful? This is
> the design we have.

That is actually good question, was willing to check that as well.

> 
> > If we bump up CDCLK, to avoid this, will we even then use a second VDSC 
> > ever?
> 
> I think we'll eventually need better logic than unconditionally bumping
> to max, and it needs to take *both* the cdclk and the number of dsc
> engines into account. The referenced bspec only has the vdsc clock
> perspective, not overall perspective.

What we need to clarify here is that how this is supposed to work in theory.
Basically same issue can be fixed by both increasing the CDCLK or enabling
2nd VDSC engine.
There should be some guideline telling us, how to prioritize. 
>From overall perspective as I understand, by default, we are able to keep
CDCLK 2 times less than pixel rate(see intel_pixel_rate_to_cdclk), however
due to that VDSC limitation that it can use only 1 ppc this becomes, not
applicable anymore(at least as of BSpec 49259), so we have to increase amount
of VDSC instances then.

So the question is now - what is more optimal here?
Also if we bump up CDCLK(which we have done many times already in fact), we
then need to add some logic to inte

Re: [Intel-gfx] [PATCH 01/26] dma-buf: add dma_resv_for_each_fence_unlocked

2021-09-14 Thread Tvrtko Ursulin



On 14/09/2021 12:25, Christian König wrote:

Am 14.09.21 um 12:53 schrieb Tvrtko Ursulin:


On 13/09/2021 14:16, Christian König wrote:

Abstract the complexity of iterating over all the fences
in a dma_resv object.

The new loop handles the whole RCU and retry dance and
returns only fences where we can be sure we grabbed the
right one.

Signed-off-by: Christian König 
---
  drivers/dma-buf/dma-resv.c | 63 ++
  include/linux/dma-resv.h   | 36 ++
  2 files changed, 99 insertions(+)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 84fbe60629e3..213a9b7251ca 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -323,6 +323,69 @@ void dma_resv_add_excl_fence(struct dma_resv 
*obj, struct dma_fence *fence)

  }
  EXPORT_SYMBOL(dma_resv_add_excl_fence);
  +/**
+ * dma_resv_walk_unlocked - walk over fences in a dma_resv obj
+ * @obj: the dma_resv object
+ * @cursor: cursor to record the current position
+ * @all_fences: true returns also the shared fences
+ * @first: if we should start over
+ *
+ * Return all the fences in the dma_resv object which are not yet 
signaled.

+ * The returned fence has an extra local reference so will stay alive.
+ * If a concurrent modify is detected the whole iterator is started 
over again.

+ */
+struct dma_fence *dma_resv_walk_unlocked(struct dma_resv *obj,
+ struct dma_resv_cursor *cursor,
+ bool all_fences, bool first)
+{
+    struct dma_fence *fence = NULL;
+
+    do {
+    /* Drop the reference from the previous round */
+    dma_fence_put(fence);
+
+    cursor->is_first = first;
+    if (first) {
+    cursor->seq = read_seqcount_begin(&obj->seq);
+    cursor->index = -1;
+    cursor->fences = dma_resv_shared_list(obj);
+    cursor->is_exclusive = true;
+
+    fence = dma_resv_excl_fence(obj);
+    if (fence && test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+  &fence->flags))
+    fence = NULL;
+    } else {
+    fence = NULL;
+    }
+
+    if (fence) {
+    fence = dma_fence_get_rcu(fence);
+    } else if (all_fences && cursor->fences) {
+    struct dma_resv_list *fences = cursor->fences;


If rcu lock is allowed to be dropped while walking the list what 
guarantees list of fences hasn't been freed?


Ah, good point! We need to test the sequence number when we enter the 
function as well. Going to fix that.


Right, but just to say, I am still on the fence a bit on the concept of 
the unlocked iterator. So for now I am looking only superficially at the 
implementation and i915 side of things.






Like:

1st call
  -> gets seqcount
  -> stores cursor->fences

rcu lock dropped/re-acquired

2nd call
  -> dereferences into cursor->fences -> boom?


+
+    cursor->is_exclusive = false;
+    while (++cursor->index < fences->shared_count) {
+    fence = rcu_dereference(fences->shared[
+    cursor->index]);
+    if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+  &fence->flags))
+    break;
+    }
+    if (cursor->index < fences->shared_count)
+    fence = dma_fence_get_rcu(fence);
+    else
+    fence = NULL;
+    }
+
+    /* For the eventually next round */
+    first = true;
+    } while (read_seqcount_retry(&obj->seq, cursor->seq));
+
+    return fence;
+}
+EXPORT_SYMBOL_GPL(dma_resv_walk_unlocked);
+
  /**
   * dma_resv_copy_fences - Copy all fences from src to dst.
   * @dst: the destination reservation object
diff --git a/include/linux/dma-resv.h b/include/linux/dma-resv.h
index 9100dd3dc21f..f5b91c292ee0 100644
--- a/include/linux/dma-resv.h
+++ b/include/linux/dma-resv.h
@@ -149,6 +149,39 @@ struct dma_resv {
  struct dma_resv_list __rcu *fence;
  };
  +/**
+ * struct dma_resv_cursor - current position into the dma_resv fences
+ * @seq: sequence number to check
+ * @index: index into the shared fences
+ * @shared: the shared fences
+ * @is_first: true if this is the first returned fence
+ * @is_exclusive: if the current fence is the exclusive one
+ */
+struct dma_resv_cursor {
+    unsigned int seq;
+    unsigned int index;
+    struct dma_resv_list *fences;
+    bool is_first;


Is_first is useful to callers - like they are legitimately allowed to 
look inside this, what could otherwise be private object?


Yes, I was pondering on the same question. Key point is that this is 
only used by other dma_resv functions which also use cursor.fences for 
example.


So this is only supposed to be used by code working with other privates 
of the dma_resv object as well.


Hmmm and you think external callers have no legitimate case of detecting 
restarts? Or to better say will not have the need of distinguishing 
between real restarts and just the 

Re: [Intel-gfx] [PATCH 13/26] drm/i915: use the new iterator in i915_gem_busy_ioctl

2021-09-14 Thread Tvrtko Ursulin



On 13/09/2021 14:16, Christian König wrote:

This makes the function much simpler since the complex
retry logic is now handled else where.

Signed-off-by: Christian König 
---
  drivers/gpu/drm/i915/gem/i915_gem_busy.c | 30 +++-
  1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c 
b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
index 6234e17259c1..c6c6d747b33e 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_busy.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -82,8 +82,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
  {
struct drm_i915_gem_busy *args = data;
struct drm_i915_gem_object *obj;
-   struct dma_resv_list *list;
-   unsigned int seq;
+   struct dma_resv_cursor cursor;
+   struct dma_fence *fence;
int err;
  
  	err = -ENOENT;

@@ -109,28 +109,16 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
 * to report the overall busyness. This is what the wait-ioctl does.
 *
 */
-retry:
-   seq = raw_read_seqcount(&obj->base.resv->seq);
-
-   /* Translate the exclusive fence to the READ *and* WRITE engine */
-   args->busy = busy_check_writer(dma_resv_excl_fence(obj->base.resv));
-
-   /* Translate shared fences to READ set of engines */
-   list = dma_resv_shared_list(obj->base.resv);
-   if (list) {
-   unsigned int shared_count = list->shared_count, i;
-
-   for (i = 0; i < shared_count; ++i) {
-   struct dma_fence *fence =
-   rcu_dereference(list->shared[i]);
-
+   args->busy = false;
+   dma_resv_for_each_fence_unlocked(obj->base.resv, &cursor, true, fence) {


To preserve strictly identical behaviour, I think add before the below 
if-else:


if (cursor.is_first)
args->busy = 0;

As mentioned elsewhere in the thread I am not convinced it is hugely 
important, all that can happen without it is that stale read activity 
gets reported, and this ioctl is not really that sensitive to that.


Regards,

Tvrtko


+   if (cursor.is_exclusive)
+   /* Translate the exclusive fence to the READ *and* 
WRITE engine */
+   args->busy = busy_check_writer(fence);
+   else
+   /* Translate shared fences to READ set of engines */
args->busy |= busy_check_reader(fence);
-   }
}
  
-	if (args->busy && read_seqcount_retry(&obj->base.resv->seq, seq))

-   goto retry;
-
err = 0;
  out:
rcu_read_unlock();



Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Lisovskiy, Stanislav
On Tue, Sep 14, 2021 at 04:04:25PM +0300, Lisovskiy, Stanislav wrote:
> On Tue, Sep 14, 2021 at 03:04:11PM +0300, Jani Nikula wrote:
> > On Tue, 14 Sep 2021, "Lisovskiy, Stanislav"  
> > wrote:
> > > On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
> > >> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
> > >> > > -Original Message-
> > >> > > From: Ville Syrjälä 
> > >> > > Sent: Tuesday, September 14, 2021 12:59 PM
> > >> > > To: Kulkarni, Vandita 
> > >> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani 
> > >> > > ;
> > >> > > Navare, Manasi D 
> > >> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> > >> > > engine for higher moderates
> > >> > > 
> > >> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
> > >> > > > Each VDSC operates with 1ppc throughput, hence enable the second 
> > >> > > > VDSC
> > >> > > > engine when moderate is higher that the current cdclk.
> > >> > > >
> > >> > > > Signed-off-by: Vandita Kulkarni 
> > >> > > > ---
> > >> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> > >> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
> > >> > > >
> > >> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> > >> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
> > >> > > > index 161c33b2c869..55878f65f724 100644
> > >> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> > >> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> > >> > > > @@ -70,6 +70,7 @@
> > >> > > >  #include "intel_tc.h"
> > >> > > >  #include "intel_vdsc.h"
> > >> > > >  #include "intel_vrr.h"
> > >> > > > +#include "intel_cdclk.h"
> > >> > > >
> > >> > > >  #define DP_DPRX_ESI_LEN 14
> > >> > > >
> > >> > > > @@ -1291,10 +1292,13 @@ static int 
> > >> > > > intel_dp_dsc_compute_config(struct
> > >> > > intel_dp *intel_dp,
> > >> > > >   struct drm_connector_state 
> > >> > > > *conn_state,
> > >> > > >   struct link_config_limits 
> > >> > > > *limits)  {
> > >> > > > +  struct intel_cdclk_state *cdclk_state;
> > >> > > >struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
> > >> > > >struct drm_i915_private *dev_priv = to_i915(dig_port-
> > >> > > >base.base.dev);
> > >> > > >const struct drm_display_mode *adjusted_mode =
> > >> > > >&pipe_config->hw.adjusted_mode;
> > >> > > > +  struct intel_atomic_state *state =
> > >> > > > +  to_intel_atomic_state(pipe_config-
> > >> > > >uapi.state);
> > >> > > >int pipe_bpp;
> > >> > > >int ret;
> > >> > > >
> > >> > > > @@ -1373,12 +1377,16 @@ static int 
> > >> > > > intel_dp_dsc_compute_config(struct
> > >> > > intel_dp *intel_dp,
> > >> > > >}
> > >> > > >}
> > >> > > >
> > >> > > > +  cdclk_state = intel_atomic_get_cdclk_state(state);
> > >> > > > +  if (IS_ERR(cdclk_state))
> > >> > > > +  return PTR_ERR(cdclk_state);
> > >> > > > +
> > >> > > >/*
> > >> > > > * VDSC engine operates at 1 Pixel per clock, so if peak pixel 
> > >> > > > rate
> > >> > > > -   * is greater than the maximum Cdclock and if slice count is 
> > >> > > > even
> > >> > > > +   * is greater than the current Cdclock and if slice count is 
> > >> > > > even
> > >> > > > * then we need to use 2 VDSC instances.
> > >> > > > */
> > >> > > > -  if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
> > >> > > > +  if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
> > >> > > 
> > >> > > This is wrong. We compute the cdclk based on the requirements of the
> > >> > > mode/etc., not the other way around.
> > >
> > > According to BSpec guideline, we decide whether we enable or disable 
> > > second VDSC engine, based
> > > on that condition. As I understand that one is about DSC config 
> > > calculation, based on CDCLK
> > > which was calculated. 
> > 
> > Point is, at the time compute_config gets called, what guarantees are
> > there that cdclk_state->actual.cdclk contains anything useful? This is
> > the design we have.
> 
> That is actually good question, was willing to check that as well.
> 
> > 
> > > If we bump up CDCLK, to avoid this, will we even then use a second VDSC 
> > > ever?
> > 
> > I think we'll eventually need better logic than unconditionally bumping
> > to max, and it needs to take *both* the cdclk and the number of dsc
> > engines into account. The referenced bspec only has the vdsc clock
> > perspective, not overall perspective.
> 
> What we need to clarify here is that how this is supposed to work in theory.
> Basically same issue can be fixed by both increasing the CDCLK or enabling
> 2nd VDSC engine.
> There should be some guideline telling us, how to prioritize. 
> From overall perspective as I understand, by default, we are able to keep
> CDCLK 2 times less than pixel rate(see intel_pixel_rate_to_cdclk), however
> due to that VDSC limitation that it can use only 1 ppc this be

[Intel-gfx] ✗ Fi.CI.BUILD: failure for Add support for querying hw info that UMDs need (rev3)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Add support for querying hw info that UMDs need (rev3)
URL   : https://patchwork.freedesktop.org/series/94305/
State : failure

== Summary ==

Applying: drm/i915/guc: Add fetch of hwconfig table
error: sha1 information is lacking or useless (drivers/gpu/drm/i915/Makefile).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 drm/i915/guc: Add fetch of hwconfig table
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




[Intel-gfx] ✗ Fi.CI.BUILD: failure for lib, stackdepot: Add helper to print stack entries into buffer.

2021-09-14 Thread Patchwork
== Series Details ==

Series: lib, stackdepot: Add helper to print stack entries into buffer.
URL   : https://patchwork.freedesktop.org/series/94655/
State : failure

== Summary ==

Applying: lib, stackdepot: Add helper to print stack entries into buffer.
error: sha1 information is lacking or useless (include/linux/stackdepot.h).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 lib, stackdepot: Add helper to print stack entries into 
buffer.
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




[Intel-gfx] ✗ Fi.CI.BUILD: failure for Move vfio_ccw to the new mdev API (rev2)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Move vfio_ccw to the new mdev API (rev2)
URL   : https://patchwork.freedesktop.org/series/94520/
State : failure

== Summary ==

Applying: Move vfio_ccw to the new mdev API
error: sha1 information is lacking or useless (drivers/s390/cio/vfio_ccw_fsm.c).
error: could not build fake ancestor
hint: Use 'git am --show-current-patch=diff' to see the failed patch
Patch failed at 0001 Move vfio_ccw to the new mdev API
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




Re: [Intel-gfx] [PATCH v2] kernel/locking: Add context to ww_mutex_trylock.

2021-09-14 Thread Daniel Vetter
On Tue, Sep 14, 2021 at 02:43:02PM +0200, Maarten Lankhorst wrote:
> Op 14-09-2021 om 08:50 schreef Peter Zijlstra:
> > On Mon, Sep 13, 2021 at 10:42:36AM +0200, Maarten Lankhorst wrote:
> >
> >>> +/**
> >>> + * ww_mutex_trylock - tries to acquire the w/w mutex with optional 
> >>> acquire context
> >>> + * @ww: mutex to lock
> >>> + * @ww_ctx: optional w/w acquire context
> >>> + *
> >>> + * Trylocks a mutex with the optional acquire context; no deadlock 
> >>> detection is
> >>> + * possible. Returns 1 if the mutex has been acquired successfully, 0 
> >>> otherwise.
> >>> + *
> >>> + * Unlike ww_mutex_lock, no deadlock handling is performed. However, if 
> >>> a @ctx is
> >>> + * specified, -EALREADY handling may happen in calls to ww_mutex_trylock.
> >>> + *
> >>> + * A mutex acquired with this function must be released with 
> >>> ww_mutex_unlock.
> >>> + */
> >>> +int ww_mutex_trylock(struct ww_mutex *ww, struct ww_acquire_ctx *ww_ctx)
> >>> +{
> >>> + if (!ww_ctx)
> >>> + return mutex_trylock(&ww->base);
> >>> +
> >>> + MUTEX_WARN_ON(ww->base.magic != &ww->base);
> >>> +
> >>> + if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
> >>> + return -EALREADY;
> >> I'm not 100% sure this is a good idea, because it would make the
> >> trylock weird.  For i915 I checked manually, because I didn't want to
> >> change the function signature. This is probably the other extreme.
> >>
> >> "if (ww_mutex_trylock())" would look correct, but actually be wrong
> >> and lead to double unlock without adjustments.  Maybe we could make a
> >> ww_mutex_trylock_ctx_err, which would return -EALREADY or -EBUSY on
> >> failure, and 0 on success?  We could keep ww_mutex_trylock without
> >> ctx, probably just #define as (!ww_mutex_trylock_ctx_err(lock, NULL))
> > Urgh, yeah. Also, I suppose that if we already own it, we'll just fail
> > the trylock anyway. Let me take this out.
> >
> >>> + /*
> >>> +  * Reset the wounded flag after a kill. No other process can
> >>> +  * race and wound us here, since they can't have a valid owner
> >>> +  * pointer if we don't have any locks held.
> >>> +  */
> >>> + if (ww_ctx->acquired == 0)
> >>> + ww_ctx->wounded = 0;
> >> Yeah I guess this needs fixing too. Not completely sure since trylock
> >> wouldn't do the whole ww dance, but since it's our first lock,
> >> probably best to do so regardless so other users don't trip over it.
> > This is actually critical, because if this trylock is the first lock
> > acquisition for the context, there won't be any other opportunity to
> > reset this value.
> >
> >>> +
> >>> + if (__mutex_trylock(&ww->base)) {
> >>> + ww_mutex_set_context_fastpath(ww, ww_ctx);
> >>> + mutex_acquire_nest(&ww->base.dep_map, 0, 1, &ww_ctx->dep_map, 
> >>> _RET_IP_);
> >>> + return 1;
> >>> + }
> >>> +
> >>> + return 0;
> >>> +}
> >>> +EXPORT_SYMBOL(ww_mutex_trylock);
> > Updated version below...
> >
> > ---
> > Subject: kernel/locking: Add context to ww_mutex_trylock()
> > From: Maarten Lankhorst 
> > Date: Thu, 9 Sep 2021 11:32:18 +0200
> >
> > From: Maarten Lankhorst 
> >
> > i915 will soon gain an eviction path that trylock a whole lot of locks
> > for eviction, getting dmesg failures like below:
> >
> >   BUG: MAX_LOCK_DEPTH too low!
> >   turning off the locking correctness validator.
> >   depth: 48  max: 48!
> >   48 locks held by i915_selftest/5776:
> >#0: 888101a79240 (&dev->mutex){}-{3:3}, at: 
> > __driver_attach+0x88/0x160
> >#1: c99778c0 (reservation_ww_class_acquire){+.+.}-{0:0}, at: 
> > i915_vma_pin.constprop.63+0x39/0x1b0 [i915]
> >#2: 88800cf74de8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> > i915_vma_pin.constprop.63+0x5f/0x1b0 [i915]
> >#3: 88810c7f9e38 (&vm->mutex/1){+.+.}-{3:3}, at: 
> > i915_vma_pin_ww+0x1c4/0x9d0 [i915]
> >#4: 88810bad5768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> > i915_gem_evict_something+0x110/0x860 [i915]
> >#5: 88810bad60e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> > i915_gem_evict_something+0x110/0x860 [i915]
> >   ...
> >#46: 88811964d768 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> > i915_gem_evict_something+0x110/0x860 [i915]
> >#47: 88811964e0e8 (reservation_ww_class_mutex){+.+.}-{3:3}, at: 
> > i915_gem_evict_something+0x110/0x860 [i915]
> >   INFO: lockdep is turned off.
> >
> > Fixing eviction to nest into ww_class_acquire is a high priority, but
> > it requires a rework of the entire driver, which can only be done one
> > step at a time.
> >
> > As an intermediate solution, add an acquire context to
> > ww_mutex_trylock, which allows us to do proper nesting annotations on
> > the trylocks, making the above lockdep splat disappear.
> >
> > This is also useful in regulator_lock_nested, which may avoid dropping
> > regulator_nesting_mutex in the uncontended path, so use it there.
> >
> > TTM may be another user for this, where we could lock a buffer in a
> > fastpath with list locks hel

Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Jani Nikula
On Tue, 14 Sep 2021, "Lisovskiy, Stanislav"  
wrote:
> On Tue, Sep 14, 2021 at 04:04:25PM +0300, Lisovskiy, Stanislav wrote:
>> On Tue, Sep 14, 2021 at 03:04:11PM +0300, Jani Nikula wrote:
>> > On Tue, 14 Sep 2021, "Lisovskiy, Stanislav" 
>> >  wrote:
>> > > On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
>> > >> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
>> > >> > > -Original Message-
>> > >> > > From: Ville Syrjälä 
>> > >> > > Sent: Tuesday, September 14, 2021 12:59 PM
>> > >> > > To: Kulkarni, Vandita 
>> > >> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani 
>> > >> > > ;
>> > >> > > Navare, Manasi D 
>> > >> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second 
>> > >> > > VDSC
>> > >> > > engine for higher moderates
>> > >> > > 
>> > >> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni wrote:
>> > >> > > > Each VDSC operates with 1ppc throughput, hence enable the second 
>> > >> > > > VDSC
>> > >> > > > engine when moderate is higher that the current cdclk.
>> > >> > > >
>> > >> > > > Signed-off-by: Vandita Kulkarni 
>> > >> > > > ---
>> > >> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
>> > >> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
>> > >> > > >
>> > >> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
>> > >> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
>> > >> > > > index 161c33b2c869..55878f65f724 100644
>> > >> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
>> > >> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
>> > >> > > > @@ -70,6 +70,7 @@
>> > >> > > >  #include "intel_tc.h"
>> > >> > > >  #include "intel_vdsc.h"
>> > >> > > >  #include "intel_vrr.h"
>> > >> > > > +#include "intel_cdclk.h"
>> > >> > > >
>> > >> > > >  #define DP_DPRX_ESI_LEN 14
>> > >> > > >
>> > >> > > > @@ -1291,10 +1292,13 @@ static int 
>> > >> > > > intel_dp_dsc_compute_config(struct
>> > >> > > intel_dp *intel_dp,
>> > >> > > >  struct drm_connector_state 
>> > >> > > > *conn_state,
>> > >> > > >  struct link_config_limits 
>> > >> > > > *limits)  {
>> > >> > > > + struct intel_cdclk_state *cdclk_state;
>> > >> > > >   struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
>> > >> > > >   struct drm_i915_private *dev_priv = to_i915(dig_port-
>> > >> > > >base.base.dev);
>> > >> > > >   const struct drm_display_mode *adjusted_mode =
>> > >> > > >   &pipe_config->hw.adjusted_mode;
>> > >> > > > + struct intel_atomic_state *state =
>> > >> > > > + to_intel_atomic_state(pipe_config-
>> > >> > > >uapi.state);
>> > >> > > >   int pipe_bpp;
>> > >> > > >   int ret;
>> > >> > > >
>> > >> > > > @@ -1373,12 +1377,16 @@ static int 
>> > >> > > > intel_dp_dsc_compute_config(struct
>> > >> > > intel_dp *intel_dp,
>> > >> > > >   }
>> > >> > > >   }
>> > >> > > >
>> > >> > > > + cdclk_state = intel_atomic_get_cdclk_state(state);
>> > >> > > > + if (IS_ERR(cdclk_state))
>> > >> > > > + return PTR_ERR(cdclk_state);
>> > >> > > > +
>> > >> > > >   /*
>> > >> > > >* VDSC engine operates at 1 Pixel per clock, so if peak pixel 
>> > >> > > > rate
>> > >> > > > -  * is greater than the maximum Cdclock and if slice count is 
>> > >> > > > even
>> > >> > > > +  * is greater than the current Cdclock and if slice count is 
>> > >> > > > even
>> > >> > > >* then we need to use 2 VDSC instances.
>> > >> > > >*/
>> > >> > > > - if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq ||
>> > >> > > > + if (adjusted_mode->crtc_clock > cdclk_state->actual.cdclk ||
>> > >> > > 
>> > >> > > This is wrong. We compute the cdclk based on the requirements of the
>> > >> > > mode/etc., not the other way around.
>> > >
>> > > According to BSpec guideline, we decide whether we enable or disable 
>> > > second VDSC engine, based
>> > > on that condition. As I understand that one is about DSC config 
>> > > calculation, based on CDCLK
>> > > which was calculated. 
>> > 
>> > Point is, at the time compute_config gets called, what guarantees are
>> > there that cdclk_state->actual.cdclk contains anything useful? This is
>> > the design we have.
>> 
>> That is actually good question, was willing to check that as well.
>> 
>> > 
>> > > If we bump up CDCLK, to avoid this, will we even then use a second VDSC 
>> > > ever?
>> > 
>> > I think we'll eventually need better logic than unconditionally bumping
>> > to max, and it needs to take *both* the cdclk and the number of dsc
>> > engines into account. The referenced bspec only has the vdsc clock
>> > perspective, not overall perspective.
>> 
>> What we need to clarify here is that how this is supposed to work in theory.
>> Basically same issue can be fixed by both increasing the CDCLK or enabling
>> 2nd VDSC engine.
>> There should be some guideline telling us, how to prioritize. 
>> From overall perspective as I understand, by default, we are ab

Re: [Intel-gfx] [RFC PATCH] drm/ttm: Add a private member to the struct ttm_resource

2021-09-14 Thread Daniel Vetter
On Tue, Sep 14, 2021 at 12:38:00PM +0200, Thomas Hellström wrote:
> On Tue, 2021-09-14 at 10:53 +0200, Christian König wrote:
> > Am 14.09.21 um 10:27 schrieb Thomas Hellström:
> > > On Tue, 2021-09-14 at 09:40 +0200, Christian König wrote:
> > > > Am 13.09.21 um 14:41 schrieb Thomas Hellström:
> > > > > [SNIP]
> > > > > > > > Let's say you have a struct ttm_object_vram and a struct
> > > > > > > > ttm_object_gtt, both subclassing drm_gem_object. Then I'd
> > > > > > > > say
> > > > > > > > a
> > > > > > > > driver would want to subclass those to attach identical
> > > > > > > > data,
> > > > > > > > extend functionality and provide a single i915_gem_object
> > > > > > > > to
> > > > > > > > the
> > > > > > > > rest of the driver, which couldn't care less whether it's
> > > > > > > > vram or
> > > > > > > > gtt? Wouldn't you say having separate struct
> > > > > > > > ttm_object_vram
> > > > > > > > and a
> > > > > > > > struct ttm_object_gtt in this case would be awkward?. We
> > > > > > > > *want* to
> > > > > > > > allow common handling.
> > > > > > > Yeah, but that's a bad idea. This is like diamond
> > > > > > > inheritance
> > > > > > > in C++.
> > > > > > > 
> > > > > > > When you need the same functionality in different backends
> > > > > > > you
> > > > > > > implement that as separate object and then add a parent
> > > > > > > class.
> > > > > > > 
> > > > > > > > It's the exact same situation here. With struct
> > > > > > > > ttm_resource
> > > > > > > > you
> > > > > > > > let *different* implementation flavours subclass it,
> > > > > > > > which
> > > > > > > > makes it
> > > > > > > > awkward for the driver to extend the functionality in a
> > > > > > > > common way
> > > > > > > > by subclassing, unless the driver only uses a single
> > > > > > > > implementation.
> > > > > > > Well the driver should use separate implementations for
> > > > > > > their
> > > > > > > different domains as much as possible.
> > > > > > > 
> > > > > > Hmm, Now you lost me a bit. Are you saying that the way we do
> > > > > > dynamic
> > > > > > backends in the struct ttm_buffer_object to facilitate driver
> > > > > > subclassing is a bad idea or that the RFC with backpointer is
> > > > > > a
> > > > > > bad
> > > > > > idea?
> > > > > > 
> > > > > > 
> > > > > Or if you mean diamond inheritance is bad, yes that's basically
> > > > > my
> > > > > point.
> > > > That diamond inheritance is a bad idea. What I don't understand
> > > > is
> > > > why
> > > > you need that in the first place?
> > > > 
> > > > Information that you attach to a resource are specific to the
> > > > domain
> > > > where the resource is allocated from. So why do you want to
> > > > attach
> > > > the
> > > > same information to a resources from different domains?
> > > Again, for the same reason that we do that with struct
> > > i915_gem_objects
> > > and struct ttm_tts, to extend the functionality. I mean information
> > > that we attach when we subclass a struct ttm_buffer_object doesn't
> > > necessarily care about whether it's a VRAM or a GTT object. In
> > > exactly
> > > the same way, information that we want to attach to a struct
> > > ttm_resource doesn't necessarily care whether it's a system or a
> > > VRAM
> > > resource, and need not be specific to any of those.
> > > 
> > > In this particular case, as memory management becomes asynchronous,
> > > you
> > > can't attach things like sg-tables and gpu binding information to
> > > the
> > > gem object anymore, because the object may have a number of
> > > migrations
> > > in the pipeline. Such things need to be attached to the structure
> > > that
> > > abstracts the memory allocation, and which may have a completely
> > > different lifetime than the object itself.
> > > 
> > > In our particular case we want to attach information for cached
> > > page
> > > lookup and and sg-table, and moving forward probably the gpu
> > > binding
> > > (vma) information, and that is the same information for any
> > > ttm_resource regardless where it's allocated from.
> > > 
> > > Typical example: A pipelined GPU operation happening before an
> > > async
> > > eviction goes wrong. We need to error capture and reset. But if we
> > > look
> > > at the object for error capturing, it's already updated pointing to
> > > an
> > > after-eviction resource, and the resource sits on a ghost object
> > > (or in
> > > the future when ghost objects go away perhaps in limbo somewhere).
> > > 
> > > We need to capture the memory pointed to by the struct ttm_resource
> > > the
> > > GPU was referencing, and to be able to do that we need to cache
> > > driver-
> > > specific info on the resource. Typically an sg-list and GPU binding
> > > information.
> > > 
> > > Anyway, that cached information needs to be destroyed together with
> > > the
> > > resource and thus we need to be able to access that information
> > > from
> > > the resource in some way, regardless whether it's a pointer or
> > > whether
> > > we embed 

Re: [Intel-gfx] [PATCH 2/4] drm/i915/guc: Do error capture asynchronously

2021-09-14 Thread Daniel Vetter
On Mon, Sep 13, 2021 at 10:09:54PM -0700, Matthew Brost wrote:
> An error capture allocates memory, memory allocations depend on resets,
> and resets need to flush the G2H handlers to seal several races. If the
> error capture is done from the G2H handler this creates a circular
> dependency. To work around this, do a error capture in a work queue
> asynchronously from the G2H handler. This should be fine as (eventually)
> all register state is put into a buffer by the GuC so it is safe to
> restart the context before the error capture is complete.
> 
> Example of lockdep splat below:

Pushing work into a work_struct to fix a lockdep splat does nothing more
than hide the lockdep splat. Or it creates a race.

So no, let's not make this more of a mess than it already is please.
-Daniel

> 
> [  154.625989] ==
> [  154.632195] WARNING: possible circular locking dependency detected
> [  154.638393] 5.14.0-rc5-guc+ #50 Tainted: G U
> [  154.643991] --
> [  154.650196] i915_selftest/1673 is trying to acquire lock:
> [  154.655621] 8881079cb918 
> ((work_completion)(&ct->requests.worker)){+.+.}-{0:0}, at: 
> __flush_work+0x350/0x4d0 [  154.665826]
>but task is already holding lock:
> [  154.671682] 8881079cbfb8 (>->reset.mutex){+.+.}-{3:3}, at: 
> intel_gt_reset+0xf0/0x300 [i915] [  154.680659]
>which lock already depends on the new lock.
> 
> [  154.688857]
>the existing dependency chain (in reverse order) is:
> [  154.696365]
>-> #2 (>->reset.mutex){+.+.}-{3:3}:
> [  154.702571]lock_acquire+0xd2/0x300
> [  154.706695]i915_gem_shrinker_taints_mutex+0x2d/0x50 [i915]
> [  154.712959]intel_gt_init_reset+0x61/0x80 [i915]
> [  154.718258]intel_gt_init_early+0xe6/0x120 [i915]
> [  154.723648]i915_driver_probe+0x592/0xdc0 [i915]
> [  154.728942]i915_pci_probe+0x43/0x1c0 [i915]
> [  154.733891]pci_device_probe+0x9b/0x110
> [  154.738362]really_probe+0x1a6/0x3a0
> [  154.742568]__driver_probe_device+0xf9/0x170
> [  154.747468]driver_probe_device+0x19/0x90
> [  154.752114]__driver_attach+0x99/0x170
> [  154.756492]bus_for_each_dev+0x73/0xc0
> [  154.760870]bus_add_driver+0x14b/0x1f0
> [  154.765248]driver_register+0x67/0xb0
> [  154.769542]i915_init+0x18/0x8c [i915]
> [  154.773964]do_one_initcall+0x53/0x2e0
> [  154.778343]do_init_module+0x56/0x210
> [  154.782639]load_module+0x25fc/0x29f0
> [  154.786934]__do_sys_finit_module+0xae/0x110
> [  154.791835]do_syscall_64+0x38/0xc0
> [  154.795958]entry_SYSCALL_64_after_hwframe+0x44/0xae
> [  154.801558]
>-> #1 (fs_reclaim){+.+.}-{0:0}:
> [  154.807241]lock_acquire+0xd2/0x300
> [  154.811361]fs_reclaim_acquire+0x9e/0xd0
> [  154.815914]kmem_cache_alloc_trace+0x30/0x790
> [  154.820899]i915_gpu_coredump_alloc+0x53/0x1a0 [i915]
> [  154.826649]i915_gpu_coredump+0x39/0x560 [i915]
> [  154.831866]i915_capture_error_state+0xa/0x70 [i915]
> [  154.837513]intel_guc_context_reset_process_msg+0x174/0x1f0 [i915]
> [  154.844383]ct_incoming_request_worker_func+0x130/0x1b0 [i915]
> [  154.850898]process_one_work+0x264/0x590
> [  154.855451]worker_thread+0x4b/0x3a0
> [  154.859655]kthread+0x147/0x170
> [  154.863428]ret_from_fork+0x1f/0x30
> [  154.867548]
>-> #0 ((work_completion)(&ct->requests.worker)){+.+.}-{0:0}:
> [  154.875747]check_prev_add+0x90/0xc30
> [  154.880042]__lock_acquire+0x1643/0x2110
> [  154.884595]lock_acquire+0xd2/0x300
> [  154.888715]__flush_work+0x373/0x4d0
> [  154.892920]intel_guc_submission_reset_prepare+0xf3/0x340 [i915]
> [  154.899606]intel_uc_reset_prepare+0x40/0x50 [i915]
> [  154.905166]reset_prepare+0x55/0x60 [i915]
> [  154.909946]intel_gt_reset+0x11c/0x300 [i915]
> [  154.914984]do_device_reset+0x13/0x20 [i915]
> [  154.919936]check_whitelist_across_reset+0x166/0x250 [i915]
> [  154.926212]live_reset_whitelist.cold+0x6a/0x7a [i915]
> [  154.932037]__i915_subtests.cold+0x20/0x74 [i915]
> [  154.937428]__run_selftests.cold+0x96/0xee [i915]
> [  154.942816]i915_live_selftests+0x2c/0x60 [i915]
> [  154.948125]i915_pci_probe+0x93/0x1c0 [i915]
> [  154.953076]pci_device_probe+0x9b/0x110
> [  154.957545]really_probe+0x1a6/0x3a0
> [  154.961749]__driver_probe_device+0xf9/0x170
> [  154.966653]driver_probe_device+0x19/0x90
> [  154.971290]__driver_attach+0x99/0x170
> [  154.975671]bus_for_each_dev+0x73/0xc0
> [  154.980053]bus_add_driver+0x14b/0x1f0
> [  154.984431]driver_register+0x67/0xb0
> [  154.988725]   

Re: [Intel-gfx] [PATCH 4/4] drm/i915/guc: Refcount context during error capture

2021-09-14 Thread Daniel Vetter
On Mon, Sep 13, 2021 at 10:09:56PM -0700, Matthew Brost wrote:
> From: John Harrison 
> 
> When i915 receives a context reset notification from GuC, it triggers
> an error capture before resetting any outstanding requsts of that
> context. Unfortunately, the error capture is not a time bound
> operation. In certain situations it can take a long time, particularly
> when multiple large LMEM buffers must be read back and eoncoded. If
> this delay is longer than other timeouts (heartbeat, test recovery,
> etc.) then a full GT reset can be triggered in the middle.
> 
> That can result in the context being reset by GuC actually being
> destroyed before the error capture completes and the GuC submission
> code resumes. Thus, the GuC side can start dereferencing stale
> pointers and Bad Things ensue.
> 
> So add a refcount get of the context during the entire reset
> operation. That way, the context can't be destroyed part way through
> no matter what other resets or user interactions occur.
> 
> v2:
>  (Matthew Brost)
>   - Update patch to work with async error capture
> 
> Signed-off-by: John Harrison 
> Signed-off-by: Matthew Brost 

This sounds like a fundamental issue in our reset/scheduler design. If we
have multiple timeout-things working in parallel, then there's going to be
an endless whack-a-mole fireworks show.

Reset is not a perf critical path (aside from media timeout, which guc
handles internally anyway). Simplicity trumps everything else. The fix
here is to guarantee that anything related to reset cannot happen in
parallel with anything else related to reset/timeout. At least on a
per-engine (and really on a per-reset domain) basis.

The fix we've developed for drm/sched is that the driver can allocate a
single-thread work queue, pass it to each drm/sched instance, and all
timeout handling is run in there.

For i915 it's more of a mess since we have a ton of random things that
time out/reset potentially going on in parallel. But that's the design we
should head towards.

_not_ sprinkling random refcounts all over the place until most of the
oops/splats disappear. That's cargo-culting, not engineering.
-Daniel

> ---
>  .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 24 +--
>  1 file changed, 22 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 1986a57b52cc..02917fc4d4a8 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -2888,6 +2888,8 @@ static void capture_worker_func(struct work_struct *w)
>   intel_engine_set_hung_context(engine, ce);
>   with_intel_runtime_pm(&i915->runtime_pm, wakeref)
>   i915_capture_error_state(gt, ce->engine->mask);
> +
> + intel_context_put(ce);
>  }
>  
>  static void capture_error_state(struct intel_guc *guc,
> @@ -2924,7 +2926,7 @@ static void guc_context_replay(struct intel_context *ce)
>   tasklet_hi_schedule(&sched_engine->tasklet);
>  }
>  
> -static void guc_handle_context_reset(struct intel_guc *guc,
> +static bool guc_handle_context_reset(struct intel_guc *guc,
>struct intel_context *ce)
>  {
>   trace_intel_context_reset(ce);
> @@ -2937,7 +2939,11 @@ static void guc_handle_context_reset(struct intel_guc 
> *guc,
>  !context_blocked(ce))) {
>   capture_error_state(guc, ce);
>   guc_context_replay(ce);
> +
> + return false;
>   }
> +
> + return true;
>  }
>  
>  int intel_guc_context_reset_process_msg(struct intel_guc *guc,
> @@ -2945,6 +2951,7 @@ int intel_guc_context_reset_process_msg(struct 
> intel_guc *guc,
>  {
>   struct intel_context *ce;
>   int desc_idx;
> + unsigned long flags;
>  
>   if (unlikely(len != 1)) {
>   drm_err(&guc_to_gt(guc)->i915->drm, "Invalid length %u", len);
> @@ -2952,11 +2959,24 @@ int intel_guc_context_reset_process_msg(struct 
> intel_guc *guc,
>   }
>  
>   desc_idx = msg[0];
> +
> + /*
> +  * The context lookup uses the xarray but lookups only require an RCU 
> lock
> +  * not the full spinlock. So take the lock explicitly and keep it until 
> the
> +  * context has been reference count locked to ensure it can't be 
> destroyed
> +  * asynchronously until the reset is done.
> +  */
> + xa_lock_irqsave(&guc->context_lookup, flags);
>   ce = g2h_context_lookup(guc, desc_idx);
> + if (ce)
> + intel_context_get(ce);
> + xa_unlock_irqrestore(&guc->context_lookup, flags);
> +
>   if (unlikely(!ce))
>   return -EPROTO;
>  
> - guc_handle_context_reset(guc, ce);
> + if (guc_handle_context_reset(guc, ce))
> + intel_context_put(ce);
>  
>   return 0;
>  }
> -- 
> 2.32.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Kulkarni, Vandita
> -Original Message-
> From: Nikula, Jani 
> Sent: Tuesday, September 14, 2021 7:33 PM
> To: Lisovskiy, Stanislav 
> Cc: Ville Syrjälä ; Kulkarni, Vandita
> ; intel-gfx@lists.freedesktop.org; Navare,
> Manasi D 
> Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
> engine for higher moderates
> 
> On Tue, 14 Sep 2021, "Lisovskiy, Stanislav" 
> wrote:
> > On Tue, Sep 14, 2021 at 04:04:25PM +0300, Lisovskiy, Stanislav wrote:
> >> On Tue, Sep 14, 2021 at 03:04:11PM +0300, Jani Nikula wrote:
> >> > On Tue, 14 Sep 2021, "Lisovskiy, Stanislav"
>  wrote:
> >> > > On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
> >> > >> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
> >> > >> > > -Original Message-
> >> > >> > > From: Ville Syrjälä 
> >> > >> > > Sent: Tuesday, September 14, 2021 12:59 PM
> >> > >> > > To: Kulkarni, Vandita 
> >> > >> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani
> >> > >> > > ; Navare, Manasi D
> >> > >> > > 
> >> > >> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable
> >> > >> > > second VDSC engine for higher moderates
> >> > >> > >
> >> > >> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni
> wrote:
> >> > >> > > > Each VDSC operates with 1ppc throughput, hence enable the
> >> > >> > > > second VDSC engine when moderate is higher that the current
> cdclk.
> >> > >> > > >
> >> > >> > > > Signed-off-by: Vandita Kulkarni
> >> > >> > > > 
> >> > >> > > > ---
> >> > >> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
> >> > >> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
> >> > >> > > >
> >> > >> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
> >> > >> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
> >> > >> > > > index 161c33b2c869..55878f65f724 100644
> >> > >> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
> >> > >> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
> >> > >> > > > @@ -70,6 +70,7 @@
> >> > >> > > >  #include "intel_tc.h"
> >> > >> > > >  #include "intel_vdsc.h"
> >> > >> > > >  #include "intel_vrr.h"
> >> > >> > > > +#include "intel_cdclk.h"
> >> > >> > > >
> >> > >> > > >  #define DP_DPRX_ESI_LEN 14
> >> > >> > > >
> >> > >> > > > @@ -1291,10 +1292,13 @@ static int
> >> > >> > > > intel_dp_dsc_compute_config(struct
> >> > >> > > intel_dp *intel_dp,
> >> > >> > > >struct 
> >> > >> > > > drm_connector_state
> *conn_state,
> >> > >> > > >struct 
> >> > >> > > > link_config_limits *limits)  {
> >> > >> > > > +   struct intel_cdclk_state *cdclk_state;
> >> > >> > > > struct intel_digital_port *dig_port =
> dp_to_dig_port(intel_dp);
> >> > >> > > > struct drm_i915_private *dev_priv = to_i915(dig_port-
> >> > >> > > >base.base.dev);
> >> > >> > > > const struct drm_display_mode *adjusted_mode =
> >> > >> > > > &pipe_config->hw.adjusted_mode;
> >> > >> > > > +   struct intel_atomic_state *state =
> >> > >> > > > +   
> >> > >> > > > to_intel_atomic_state(pipe_config-
> >> > >> > > >uapi.state);
> >> > >> > > > int pipe_bpp;
> >> > >> > > > int ret;
> >> > >> > > >
> >> > >> > > > @@ -1373,12 +1377,16 @@ static int
> >> > >> > > > intel_dp_dsc_compute_config(struct
> >> > >> > > intel_dp *intel_dp,
> >> > >> > > > }
> >> > >> > > > }
> >> > >> > > >
> >> > >> > > > +   cdclk_state = intel_atomic_get_cdclk_state(state);
> >> > >> > > > +   if (IS_ERR(cdclk_state))
> >> > >> > > > +   return PTR_ERR(cdclk_state);
> >> > >> > > > +
> >> > >> > > > /*
> >> > >> > > >  * VDSC engine operates at 1 Pixel per clock, so if 
> >> > >> > > > peak pixel
> rate
> >> > >> > > > -* is greater than the maximum Cdclock and if slice 
> >> > >> > > > count is
> even
> >> > >> > > > +* is greater than the current Cdclock and if slice
> >> > >> > > > +count is even
> >> > >> > > >  * then we need to use 2 VDSC instances.
> >> > >> > > >  */
> >> > >> > > > -   if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq
> ||
> >> > >> > > > +   if (adjusted_mode->crtc_clock >
> >> > >> > > > +cdclk_state->actual.cdclk ||
> >> > >> > >
> >> > >> > > This is wrong. We compute the cdclk based on the
> >> > >> > > requirements of the mode/etc., not the other way around.
> >> > >
> >> > > According to BSpec guideline, we decide whether we enable or
> >> > > disable second VDSC engine, based on that condition. As I
> >> > > understand that one is about DSC config calculation, based on CDCLK
> which was calculated.
> >> >
> >> > Point is, at the time compute_config gets called, what guarantees
> >> > are there that cdclk_state->actual.cdclk contains anything useful?
> >> > This is the design we have.
> >>
> >> That is actually good question, was willing to check that as well.
> >>
> >> >
> >> > > If we

Re: [Intel-gfx] [PATCH 1/4] drm/i915: rename debugfs_gt files

2021-09-14 Thread Lucas De Marchi

On Tue, Sep 14, 2021 at 12:16:13PM +0300, Jani Nikula wrote:

On Wed, 08 Sep 2021, Lucas De Marchi  wrote:

We shouldn't be using debugfs_ namespace for this functionality. Rename
debugfs_gt.[ch] to intel_gt_debugfs.[ch] and then make functions,
defines and structs follow suit.

While at it and since we are renaming the header, sort the includes
alphabetically.


I didn't do a detailed review, maybe someone should, but superficially
seems good. On the series,

Acked-by: Jani Nikula 


thanks. What about the question I raised on

intel_gt_register_debugfs() vs
intel_gt_debugfs_register()?

and

intel_gt_pm_register_debugfs() vs
intel_gt_pm_debugfs_register()?

what would be prefered here?

thanks
Lucas De Marchi


Re: [Intel-gfx] [PATCH 1/4] drm/i915: rename debugfs_gt files

2021-09-14 Thread Jani Nikula
On Tue, 14 Sep 2021, Lucas De Marchi  wrote:
> On Tue, Sep 14, 2021 at 12:16:13PM +0300, Jani Nikula wrote:
>>On Wed, 08 Sep 2021, Lucas De Marchi  wrote:
>>> We shouldn't be using debugfs_ namespace for this functionality. Rename
>>> debugfs_gt.[ch] to intel_gt_debugfs.[ch] and then make functions,
>>> defines and structs follow suit.
>>>
>>> While at it and since we are renaming the header, sort the includes
>>> alphabetically.
>>
>>I didn't do a detailed review, maybe someone should, but superficially
>>seems good. On the series,
>>
>>Acked-by: Jani Nikula 
>
> thanks. What about the question I raised on
>
> intel_gt_register_debugfs() vs
> intel_gt_debugfs_register()?
>
> and
>
> intel_gt_pm_register_debugfs() vs
> intel_gt_pm_debugfs_register()?
>
> what would be prefered here?

Nowadays I try to juggle the function names to have the prefix match the
filename. I think the driver has grown so big that we need that to help
with our mental model of what goes where. Beyond that, whatever flows
more naturally when you say it out aloud.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC engine for higher moderates

2021-09-14 Thread Jani Nikula
On Tue, 14 Sep 2021, "Kulkarni, Vandita"  wrote:
>> -Original Message-
>> From: Nikula, Jani 
>> Sent: Tuesday, September 14, 2021 7:33 PM
>> To: Lisovskiy, Stanislav 
>> Cc: Ville Syrjälä ; Kulkarni, Vandita
>> ; intel-gfx@lists.freedesktop.org; Navare,
>> Manasi D 
>> Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable second VDSC
>> engine for higher moderates
>> 
>> On Tue, 14 Sep 2021, "Lisovskiy, Stanislav" 
>> wrote:
>> > On Tue, Sep 14, 2021 at 04:04:25PM +0300, Lisovskiy, Stanislav wrote:
>> >> On Tue, Sep 14, 2021 at 03:04:11PM +0300, Jani Nikula wrote:
>> >> > On Tue, 14 Sep 2021, "Lisovskiy, Stanislav"
>>  wrote:
>> >> > > On Tue, Sep 14, 2021 at 10:48:46AM +0300, Ville Syrjälä wrote:
>> >> > >> On Tue, Sep 14, 2021 at 07:31:46AM +, Kulkarni, Vandita wrote:
>> >> > >> > > -Original Message-
>> >> > >> > > From: Ville Syrjälä 
>> >> > >> > > Sent: Tuesday, September 14, 2021 12:59 PM
>> >> > >> > > To: Kulkarni, Vandita 
>> >> > >> > > Cc: intel-gfx@lists.freedesktop.org; Nikula, Jani
>> >> > >> > > ; Navare, Manasi D
>> >> > >> > > 
>> >> > >> > > Subject: Re: [Intel-gfx] [PATCH] drm/i915/display: Enable
>> >> > >> > > second VDSC engine for higher moderates
>> >> > >> > >
>> >> > >> > > On Mon, Sep 13, 2021 at 08:09:23PM +0530, Vandita Kulkarni
>> wrote:
>> >> > >> > > > Each VDSC operates with 1ppc throughput, hence enable the
>> >> > >> > > > second VDSC engine when moderate is higher that the current
>> cdclk.
>> >> > >> > > >
>> >> > >> > > > Signed-off-by: Vandita Kulkarni
>> >> > >> > > > 
>> >> > >> > > > ---
>> >> > >> > > >  drivers/gpu/drm/i915/display/intel_dp.c | 12 ++--
>> >> > >> > > >  1 file changed, 10 insertions(+), 2 deletions(-)
>> >> > >> > > >
>> >> > >> > > > diff --git a/drivers/gpu/drm/i915/display/intel_dp.c
>> >> > >> > > > b/drivers/gpu/drm/i915/display/intel_dp.c
>> >> > >> > > > index 161c33b2c869..55878f65f724 100644
>> >> > >> > > > --- a/drivers/gpu/drm/i915/display/intel_dp.c
>> >> > >> > > > +++ b/drivers/gpu/drm/i915/display/intel_dp.c
>> >> > >> > > > @@ -70,6 +70,7 @@
>> >> > >> > > >  #include "intel_tc.h"
>> >> > >> > > >  #include "intel_vdsc.h"
>> >> > >> > > >  #include "intel_vrr.h"
>> >> > >> > > > +#include "intel_cdclk.h"
>> >> > >> > > >
>> >> > >> > > >  #define DP_DPRX_ESI_LEN 14
>> >> > >> > > >
>> >> > >> > > > @@ -1291,10 +1292,13 @@ static int
>> >> > >> > > > intel_dp_dsc_compute_config(struct
>> >> > >> > > intel_dp *intel_dp,
>> >> > >> > > >   struct 
>> >> > >> > > > drm_connector_state
>> *conn_state,
>> >> > >> > > >   struct 
>> >> > >> > > > link_config_limits *limits)  {
>> >> > >> > > > +  struct intel_cdclk_state *cdclk_state;
>> >> > >> > > >struct intel_digital_port *dig_port =
>> dp_to_dig_port(intel_dp);
>> >> > >> > > >struct drm_i915_private *dev_priv = to_i915(dig_port-
>> >> > >> > > >base.base.dev);
>> >> > >> > > >const struct drm_display_mode *adjusted_mode =
>> >> > >> > > >&pipe_config->hw.adjusted_mode;
>> >> > >> > > > +  struct intel_atomic_state *state =
>> >> > >> > > > +  
>> >> > >> > > > to_intel_atomic_state(pipe_config-
>> >> > >> > > >uapi.state);
>> >> > >> > > >int pipe_bpp;
>> >> > >> > > >int ret;
>> >> > >> > > >
>> >> > >> > > > @@ -1373,12 +1377,16 @@ static int
>> >> > >> > > > intel_dp_dsc_compute_config(struct
>> >> > >> > > intel_dp *intel_dp,
>> >> > >> > > >}
>> >> > >> > > >}
>> >> > >> > > >
>> >> > >> > > > +  cdclk_state = intel_atomic_get_cdclk_state(state);
>> >> > >> > > > +  if (IS_ERR(cdclk_state))
>> >> > >> > > > +  return PTR_ERR(cdclk_state);
>> >> > >> > > > +
>> >> > >> > > >/*
>> >> > >> > > > * VDSC engine operates at 1 Pixel per clock, so if 
>> >> > >> > > > peak pixel
>> rate
>> >> > >> > > > -   * is greater than the maximum Cdclock and if slice 
>> >> > >> > > > count is
>> even
>> >> > >> > > > +   * is greater than the current Cdclock and if slice
>> >> > >> > > > +count is even
>> >> > >> > > > * then we need to use 2 VDSC instances.
>> >> > >> > > > */
>> >> > >> > > > -  if (adjusted_mode->crtc_clock > dev_priv->max_cdclk_freq
>> ||
>> >> > >> > > > +  if (adjusted_mode->crtc_clock >
>> >> > >> > > > +cdclk_state->actual.cdclk ||
>> >> > >> > >
>> >> > >> > > This is wrong. We compute the cdclk based on the
>> >> > >> > > requirements of the mode/etc., not the other way around.
>> >> > >
>> >> > > According to BSpec guideline, we decide whether we enable or
>> >> > > disable second VDSC engine, based on that condition. As I
>> >> > > understand that one is about DSC config calculation, based on CDCLK
>> which was calculated.
>> >> >
>> >> > Point is, at the time compute_config gets called, what guarantees
>> >> > are there that cdclk_state->actual.cdclk contains anything useful?
>>

Re: [Intel-gfx] [RFC PATCH] drm/ttm: Add a private member to the struct ttm_resource

2021-09-14 Thread Thomas Hellström



On 9/14/21 4:07 PM, Daniel Vetter wrote:

On Tue, Sep 14, 2021 at 12:38:00PM +0200, Thomas Hellström wrote:

On Tue, 2021-09-14 at 10:53 +0200, Christian König wrote:

Am 14.09.21 um 10:27 schrieb Thomas Hellström:

On Tue, 2021-09-14 at 09:40 +0200, Christian König wrote:

Am 13.09.21 um 14:41 schrieb Thomas Hellström:

[SNIP]

Let's say you have a struct ttm_object_vram and a struct
ttm_object_gtt, both subclassing drm_gem_object. Then I'd
say
a
driver would want to subclass those to attach identical
data,
extend functionality and provide a single i915_gem_object
to
the
rest of the driver, which couldn't care less whether it's
vram or
gtt? Wouldn't you say having separate struct
ttm_object_vram
and a
struct ttm_object_gtt in this case would be awkward?. We
*want* to
allow common handling.

Yeah, but that's a bad idea. This is like diamond
inheritance
in C++.

When you need the same functionality in different backends
you
implement that as separate object and then add a parent
class.


It's the exact same situation here. With struct
ttm_resource
you
let *different* implementation flavours subclass it,
which
makes it
awkward for the driver to extend the functionality in a
common way
by subclassing, unless the driver only uses a single
implementation.

Well the driver should use separate implementations for
their
different domains as much as possible.


Hmm, Now you lost me a bit. Are you saying that the way we do
dynamic
backends in the struct ttm_buffer_object to facilitate driver
subclassing is a bad idea or that the RFC with backpointer is
a
bad
idea?



Or if you mean diamond inheritance is bad, yes that's basically
my
point.

That diamond inheritance is a bad idea. What I don't understand
is
why
you need that in the first place?

Information that you attach to a resource are specific to the
domain
where the resource is allocated from. So why do you want to
attach
the
same information to a resources from different domains?

Again, for the same reason that we do that with struct
i915_gem_objects
and struct ttm_tts, to extend the functionality. I mean information
that we attach when we subclass a struct ttm_buffer_object doesn't
necessarily care about whether it's a VRAM or a GTT object. In
exactly
the same way, information that we want to attach to a struct
ttm_resource doesn't necessarily care whether it's a system or a
VRAM
resource, and need not be specific to any of those.

In this particular case, as memory management becomes asynchronous,
you
can't attach things like sg-tables and gpu binding information to
the
gem object anymore, because the object may have a number of
migrations
in the pipeline. Such things need to be attached to the structure
that
abstracts the memory allocation, and which may have a completely
different lifetime than the object itself.

In our particular case we want to attach information for cached
page
lookup and and sg-table, and moving forward probably the gpu
binding
(vma) information, and that is the same information for any
ttm_resource regardless where it's allocated from.

Typical example: A pipelined GPU operation happening before an
async
eviction goes wrong. We need to error capture and reset. But if we
look
at the object for error capturing, it's already updated pointing to
an
after-eviction resource, and the resource sits on a ghost object
(or in
the future when ghost objects go away perhaps in limbo somewhere).

We need to capture the memory pointed to by the struct ttm_resource
the
GPU was referencing, and to be able to do that we need to cache
driver-
specific info on the resource. Typically an sg-list and GPU binding
information.

Anyway, that cached information needs to be destroyed together with
the
resource and thus we need to be able to access that information
from
the resource in some way, regardless whether it's a pointer or
whether
we embed the struct resource.

I think it's pretty important here that we (using the inheritance
diagram below) recognize the need for D to inherit from A, just
like we
do for objects or ttm_tts.



Looking at
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fen.wikipedia.org%2Fwiki%2FMultiple_inheritance%23%2Fmedia%2FFile%3ADiamond_inheritance.svg&data=04%7C01%7Cchristian.koenig%40amd.com%7C268bb562db8548b285b408d977598b2c%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637672048739103176%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=bPyDqiSF%2FHFZbl74ux0vfwh3uma5hZIUf2xbzb9yZz8%3D&reserved=0
   



1)

A would be the struct ttm_resource itself,
D would be struct i915_resource,
B would be struct ttm_range_mgr_node,
C would be struct i915_ttm_buddy_resource

And we need to resolve the ambiguity using the awkward union
construct, iff we need to derive from both B and C.

Struct ttm_buffer_object and struct ttm_tt instead have B) and
C)
being dynamic backends of A) or a single type derived from A)
Hence
the problem doesn't exist for these types.

So the 

Re: [Intel-gfx] [PATCH 1/5] drm/i915: Do not define vma on stack

2021-09-14 Thread Matthew Brost
On Tue, Sep 14, 2021 at 03:04:59PM +1000, Dave Airlie wrote:
> On Tue, 14 Sept 2021 at 14:55, Matthew Brost  wrote:
> >
> > From: Venkata Sandeep Dhanalakota 
> >
> > Defining vma on stack can cause stack overflow, if
> > vma gets populated with new fields.
> 
> Is there some higher level locking stopping that from getting trashed?
> or a guarantee that uc_fw_bind_ggtt is only entered by one thread at a
> time?
> 

I believe this function is only called during driver load (inherently
one thread) or during a GT reset (protected by reset mutex) so at most 1
thread can be executing this code at once, thus it is safe to use a
global dummy vma in this function.

Matt

> Dave.
> 
> >
> > Cc: Daniele Ceraolo Spurio 
> > Cc: Tvrtko Ursulin 
> > Signed-off-by: Venkata Sandeep Dhanalakota 
> > Signed-off-by: Matthew Brost 
> > Reviewed-by: Matthew Brost 
> > ---
> >  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 18 +-
> >  drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h |  2 ++
> >  2 files changed, 11 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> > index 3a16d08608a5..f632dbd32b42 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
> > @@ -413,20 +413,20 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw)
> >  {
> > struct drm_i915_gem_object *obj = uc_fw->obj;
> > struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
> > -   struct i915_vma dummy = {
> > -   .node.start = uc_fw_ggtt_offset(uc_fw),
> > -   .node.size = obj->base.size,
> > -   .pages = obj->mm.pages,
> > -   .vm = &ggtt->vm,
> > -   };
> > +   struct i915_vma *dummy = &uc_fw->dummy;
> > +
> > +   dummy->node.start = uc_fw_ggtt_offset(uc_fw);
> > +   dummy->node.size = obj->base.size;
> > +   dummy->pages = obj->mm.pages;
> > +   dummy->vm = &ggtt->vm;
> >
> > GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> > -   GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size);
> > +   GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
> >
> > /* uc_fw->obj cache domains were not controlled across suspend */
> > -   drm_clflush_sg(dummy.pages);
> > +   drm_clflush_sg(dummy->pages);
> >
> > -   ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
> > +   ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0);
> >  }
> >
> >  static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h 
> > b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> > index 99bb1fe1af66..693cc0ebcd63 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h
> > @@ -10,6 +10,7 @@
> >  #include "intel_uc_fw_abi.h"
> >  #include "intel_device_info.h"
> >  #include "i915_gem.h"
> > +#include "i915_vma.h"
> >
> >  struct drm_printer;
> >  struct drm_i915_private;
> > @@ -75,6 +76,7 @@ struct intel_uc_fw {
> > bool user_overridden;
> > size_t size;
> > struct drm_i915_gem_object *obj;
> > +   struct i915_vma dummy;
> >
> > /*
> >  * The firmware build process will generate a version header file 
> > with major and
> > --
> > 2.32.0
> >


Re: [Intel-gfx] [PATCH v2 1/9] vfio/ccw: Use functions for alloc/free of the vfio_ccw_private

2021-09-14 Thread Cornelia Huck
On Fri, Sep 10 2021, Christoph Hellwig  wrote:

> On Thu, Sep 09, 2021 at 04:38:41PM -0300, Jason Gunthorpe wrote:
>> +
>> +private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA);
>> +if (!private)
>> +return ERR_PTR(-ENOMEM);
>
> Nit: there is no need to add GFP_KERNEL when using GFP_DMA.
>
> Also a question to the s390 maintainers: why do we need 31-bit
> addressability for the main private data structure?

I don't think we need it anymore since c98e16b2fa12 ("s390/cio: Convert
ccw_io_region to pointer") and probably should just drop the GFP_DMA.



[Intel-gfx] ✗ Fi.CI.SPARSE: warning for Enable GuC submission by default on DG1 (rev6)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Enable GuC submission by default on DG1 (rev6)
URL   : https://patchwork.freedesktop.org/series/93325/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
-
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:32:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:32:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:56:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:56:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_reset.c:1392:5: warning: context imbalance in 
'intel_gt_reset_trylock' - different lock contexts for basic block
+drivers/gpu/drm/i915/i915_perf.c:1442:15: warning: memset with byte count of 
16777216
+drivers/gpu/drm/i915/i915_perf.c:1496:15: warning: memset with byte count of 
16777216
+./include/asm-generic/bitops/find.h:112:45: warning: shift count is negative 
(-262080)
+./include/asm-generic/bitops/find.h:32:31: warning: shift count is negative 
(-262080)
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen12_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen12_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen12_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read16' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read32' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read64' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read8' - 
different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write16' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write32' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write8' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen8_write16' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen8_write32' 
- different

[Intel-gfx] ✗ Fi.CI.BAT: failure for Enable GuC submission by default on DG1 (rev6)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Enable GuC submission by default on DG1 (rev6)
URL   : https://patchwork.freedesktop.org/series/93325/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10583 -> Patchwork_21042


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_21042 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21042, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/index.html

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_21042:

### IGT changes ###

 Possible regressions 

  * igt@core_hotunplug@unbind-rebind:
- fi-rkl-guc: [PASS][1] -> [INCOMPLETE][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-rkl-guc/igt@core_hotunp...@unbind-rebind.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-rkl-guc/igt@core_hotunp...@unbind-rebind.html
- fi-kbl-7500u:   [PASS][3] -> [INCOMPLETE][4]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-kbl-7500u/igt@core_hotunp...@unbind-rebind.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-kbl-7500u/igt@core_hotunp...@unbind-rebind.html
- fi-cfl-8109u:   [PASS][5] -> [INCOMPLETE][6]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-cfl-8109u/igt@core_hotunp...@unbind-rebind.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-cfl-8109u/igt@core_hotunp...@unbind-rebind.html

  * igt@i915_module_load@reload:
- fi-skl-6700k2:  NOTRUN -> [INCOMPLETE][7]
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-skl-6700k2/igt@i915_module_l...@reload.html
- fi-icl-y:   [PASS][8] -> [INCOMPLETE][9]
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-icl-y/igt@i915_module_l...@reload.html
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-icl-y/igt@i915_module_l...@reload.html

  * igt@i915_selftest@live@mman:
- fi-rkl-11600:   NOTRUN -> [INCOMPLETE][10]
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-rkl-11600/igt@i915_selftest@l...@mman.html
- fi-cfl-8700k:   NOTRUN -> [INCOMPLETE][11]
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-cfl-8700k/igt@i915_selftest@l...@mman.html
- fi-icl-u2:  NOTRUN -> [INCOMPLETE][12]
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-icl-u2/igt@i915_selftest@l...@mman.html

  * igt@i915_selftest@live@requests:
- fi-pnv-d510:[PASS][13] -> [DMESG-FAIL][14]
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-pnv-d510/igt@i915_selftest@l...@requests.html
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-pnv-d510/igt@i915_selftest@l...@requests.html

  
Known issues


  Here are the changes found in Patchwork_21042 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@i915_selftest@live@mman:
- fi-skl-guc: NOTRUN -> [INCOMPLETE][15] ([i915#3796])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-skl-guc/igt@i915_selftest@l...@mman.html

  * igt@runner@aborted:
- fi-cfl-8700k:   NOTRUN -> [FAIL][16] ([i915#2426] / [i915#3363])
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-cfl-8700k/igt@run...@aborted.html
- fi-icl-u2:  NOTRUN -> [FAIL][17] ([i915#2426] / [i915#3363] / 
[i915#3690])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-icl-u2/igt@run...@aborted.html
- fi-skl-guc: NOTRUN -> [FAIL][18] ([i915#1436] / [i915#2426] / 
[i915#3363])
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-skl-guc/igt@run...@aborted.html
- fi-skl-6700k2:  NOTRUN -> [FAIL][19] ([i915#2426] / [i915#3363])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-skl-6700k2/igt@run...@aborted.html

  
 Possible fixes 

  * igt@core_hotunplug@unbind-rebind:
- fi-skl-6700k2:  [INCOMPLETE][20] -> [PASS][21]
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-skl-6700k2/igt@core_hotunp...@unbind-rebind.html
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-skl-6700k2/igt@core_hotunp...@unbind-rebind.html
- fi-cfl-8700k:   [INCOMPLETE][22] -> [PASS][23]
   [22]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-cfl-8700k/igt@core_hotunp...@unbind-rebind.html
   [23]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21042/fi-cfl-8700k/igt@core_hotunp...@unbind-rebind.html
- fi-icl-u2:  [INCOMPLETE][24] -> [PASS][25]
   [24]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI

[Intel-gfx] ✗ Fi.CI.SPARSE: warning for Do error capture async, flush G2H processing on reset (rev3)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Do error capture async, flush G2H processing on reset (rev3)
URL   : https://patchwork.freedesktop.org/series/94642/
State : warning

== Summary ==

$ dim sparse --fast origin/drm-tip
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.
-
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:27:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:32:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:32:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:49:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:56:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_engine_stats.h:56:9: warning: trying to copy 
expression type 31
+drivers/gpu/drm/i915/gt/intel_reset.c:1392:5: warning: context imbalance in 
'intel_gt_reset_trylock' - different lock contexts for basic block
+drivers/gpu/drm/i915/i915_perf.c:1442:15: warning: memset with byte count of 
16777216
+drivers/gpu/drm/i915/i915_perf.c:1496:15: warning: memset with byte count of 
16777216
+./include/asm-generic/bitops/find.h:112:45: warning: shift count is negative 
(-262080)
+./include/asm-generic/bitops/find.h:32:31: warning: shift count is negative 
(-262080)
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read64' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_read8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen11_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen12_fwtable_write16' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen12_fwtable_write32' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 
'gen12_fwtable_write8' - different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read16' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read32' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read64' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_read8' - 
different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write16' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write32' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen6_write8' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen8_write16' 
- different lock contexts for basic block
+./include/linux/spinlock.h:418:9: warning: context imbalance in 'gen8_write32

[Intel-gfx] ✗ Fi.CI.BAT: failure for Do error capture async, flush G2H processing on reset (rev3)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Do error capture async, flush G2H processing on reset (rev3)
URL   : https://patchwork.freedesktop.org/series/94642/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_10583 -> Patchwork_21043


Summary
---

  **FAILURE**

  Serious unknown changes coming with Patchwork_21043 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_21043, please notify your bug team to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/index.html

Possible new issues
---

  Here are the unknown changes that may have been introduced in Patchwork_21043:

### IGT changes ###

 Possible regressions 

  * igt@core_hotunplug@unbind-rebind:
- fi-rkl-guc: [PASS][1] -> [INCOMPLETE][2]
   [1]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-rkl-guc/igt@core_hotunp...@unbind-rebind.html
   [2]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-rkl-guc/igt@core_hotunp...@unbind-rebind.html
- fi-kbl-7500u:   [PASS][3] -> [INCOMPLETE][4]
   [3]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-kbl-7500u/igt@core_hotunp...@unbind-rebind.html
   [4]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-kbl-7500u/igt@core_hotunp...@unbind-rebind.html
- fi-cfl-8109u:   [PASS][5] -> [INCOMPLETE][6]
   [5]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-cfl-8109u/igt@core_hotunp...@unbind-rebind.html
   [6]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-cfl-8109u/igt@core_hotunp...@unbind-rebind.html

  * igt@i915_module_load@reload:
- fi-icl-u2:  NOTRUN -> [INCOMPLETE][7]
   [7]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-icl-u2/igt@i915_module_l...@reload.html
- fi-kbl-soraka:  NOTRUN -> [INCOMPLETE][8]
   [8]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-kbl-soraka/igt@i915_module_l...@reload.html
- fi-icl-y:   [PASS][9] -> [INCOMPLETE][10]
   [9]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-icl-y/igt@i915_module_l...@reload.html
   [10]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-icl-y/igt@i915_module_l...@reload.html

  
Known issues


  Here are the changes found in Patchwork_21043 that come from known issues:

### IGT changes ###

 Issues hit 

  * igt@gem_exec_fence@basic-busy@bcs0:
- fi-kbl-soraka:  NOTRUN -> [SKIP][11] ([fdo#109271]) +8 similar issues
   [11]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-kbl-soraka/igt@gem_exec_fence@basic-b...@bcs0.html

  * igt@gem_exec_suspend@basic-s3:
- fi-tgl-1115g4:  [PASS][12] -> [FAIL][13] ([i915#1888]) +1 similar 
issue
   [12]: 
https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_10583/fi-tgl-1115g4/igt@gem_exec_susp...@basic-s3.html
   [13]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-tgl-1115g4/igt@gem_exec_susp...@basic-s3.html

  * igt@gem_huc_copy@huc-copy:
- fi-kbl-soraka:  NOTRUN -> [SKIP][14] ([fdo#109271] / [i915#2190])
   [14]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-kbl-soraka/igt@gem_huc_c...@huc-copy.html

  * igt@i915_selftest@live@mman:
- fi-cfl-guc: NOTRUN -> [INCOMPLETE][15] ([i915#4129])
   [15]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-cfl-guc/igt@i915_selftest@l...@mman.html
- fi-skl-guc: NOTRUN -> [INCOMPLETE][16] ([i915#3796] / [i915#4129])
   [16]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-skl-guc/igt@i915_selftest@l...@mman.html
- fi-rkl-11600:   NOTRUN -> [INCOMPLETE][17] ([i915#4129])
   [17]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-rkl-11600/igt@i915_selftest@l...@mman.html
- fi-skl-6700k2:  NOTRUN -> [INCOMPLETE][18] ([i915#3796] / [i915#4129])
   [18]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-skl-6700k2/igt@i915_selftest@l...@mman.html
- fi-kbl-7567u:   NOTRUN -> [INCOMPLETE][19] ([i915#4129])
   [19]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-kbl-7567u/igt@i915_selftest@l...@mman.html

  * igt@kms_chamelium@common-hpd-after-suspend:
- fi-kbl-soraka:  NOTRUN -> [SKIP][20] ([fdo#109271] / [fdo#111827]) +8 
similar issues
   [20]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-kbl-soraka/igt@kms_chamel...@common-hpd-after-suspend.html

  * igt@kms_flip@basic-flip-vs-modeset:
- fi-rkl-11600:   NOTRUN -> [SKIP][21] ([i915#3669])
   [21]: 
https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_21043/fi-rkl-11600/igt@kms_f...@basic-flip-vs-modeset.html

  * igt@kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d:
- fi-kbl-soraka:  NOTRUN -> [SKIP][22] ([fdo#109271] / [i915#533])
   [22]: 
https

Re: [Intel-gfx] [PATCH 0/3] drm/i915: Enable -Wsometimes-uninitialized

2021-09-14 Thread Jani Nikula
On Mon, 13 Sep 2021, Nathan Chancellor  wrote:
> On Tue, Aug 24, 2021 at 03:54:24PM -0700, Nathan Chancellor wrote:
>> Commit 46e2068081e9 ("drm/i915: Disable some extra clang warnings")
>> disabled -Wsometimes-uninitialized as noisy but there have been a few
>> fixes to clang that make the false positive rate fairly low so it should
>> be enabled to help catch obvious mistakes. The first two patches fix
>> revent instances of this warning then enables it for i915 like the rest
>> of the tree.
>> 
>> Cheers,
>> Nathan
>> 
>> Nathan Chancellor (3):
>>   drm/i915/selftests: Do not use import_obj uninitialized
>>   drm/i915/selftests: Always initialize err in
>> igt_dmabuf_import_same_driver_lmem()
>>   drm/i915: Enable -Wsometimes-uninitialized
>> 
>>  drivers/gpu/drm/i915/Makefile| 1 -
>>  drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c | 7 ---
>>  2 files changed, 4 insertions(+), 4 deletions(-)
>> 
>> 
>> base-commit: fb43ebc83e069625cfeeb2490efc3ffa0013bfa4
>> -- 
>> 2.33.0
>> 
>> 
>
> Ping, could this be picked up for an -rc as these are very clearly bugs?

Thanks for the patches and review. Pushed to drm-intel-gt-next and
cherry-picked to drm-intel-fixes, header to -rc2.

BR,
Jani.


-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [Intel-gfx] [PATCH 05/25] drm/i915/wm: provide wrappers around watermark vfuncs calls (v2)

2021-09-14 Thread Jani Nikula
On Fri, 10 Sep 2021, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This moves one wrapper from the pm->display side, and creates
> wrappers for all the others, this should simplify things later.
>
> One thing to note is that the code checks the existance of some
> of these ptrs, so the wrappers are a bit complicated by that.
>
> Suggested by Jani.
>
> v2: fixup warnings in wrong place error.

Reviewed-by: Jani Nikula 


>
> Signed-off-by: Dave Airlie 
> ---
>  drivers/gpu/drm/i915/display/intel_display.c | 187 ---
>  drivers/gpu/drm/i915/intel_pm.c  |  39 
>  drivers/gpu/drm/i915/intel_pm.h  |   1 -
>  3 files changed, 123 insertions(+), 104 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
> b/drivers/gpu/drm/i915/display/intel_display.c
> index e62f8317cbda..a1380ce02861 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -126,6 +126,101 @@ static void ilk_pfit_enable(const struct 
> intel_crtc_state *crtc_state);
>  static void intel_modeset_setup_hw_state(struct drm_device *dev,
>struct drm_modeset_acquire_ctx *ctx);
>  
> +
> +/**
> + * intel_update_watermarks - update FIFO watermark values based on current 
> modes
> + * @dev_priv: i915 device
> + *
> + * Calculate watermark values for the various WM regs based on current mode
> + * and plane configuration.
> + *
> + * There are several cases to deal with here:
> + *   - normal (i.e. non-self-refresh)
> + *   - self-refresh (SR) mode
> + *   - lines are large relative to FIFO size (buffer can hold up to 2)
> + *   - lines are small relative to FIFO size (buffer can hold more than 2
> + * lines), so need to account for TLB latency
> + *
> + *   The normal calculation is:
> + * watermark = dotclock * bytes per pixel * latency
> + *   where latency is platform & configuration dependent (we assume pessimal
> + *   values here).
> + *
> + *   The SR calculation is:
> + * watermark = (trunc(latency/line time)+1) * surface width *
> + *   bytes per pixel
> + *   where
> + * line time = htotal / dotclock
> + * surface width = hdisplay for normal plane and 64 for cursor
> + *   and latency is assumed to be high, as above.
> + *
> + * The final value programmed to the register should always be rounded up,
> + * and include an extra 2 entries to account for clock crossings.
> + *
> + * We don't use the sprite, so we can ignore that.  And on Crestline we have
> + * to set the non-SR watermarks to 8.
> + */
> +static void intel_update_watermarks(struct drm_i915_private *dev_priv)
> +{
> + if (dev_priv->display.update_wm)
> + dev_priv->display.update_wm(dev_priv);
> +}
> +
> +static int intel_compute_pipe_wm(struct intel_atomic_state *state,
> +  struct intel_crtc *crtc)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + if (dev_priv->display.compute_pipe_wm)
> + return dev_priv->display.compute_pipe_wm(state, crtc);
> + return 0;
> +}
> +
> +static int intel_compute_intermediate_wm(struct intel_atomic_state *state,
> +  struct intel_crtc *crtc)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + if (!dev_priv->display.compute_intermediate_wm)
> + return 0;
> + if (drm_WARN_ON(&dev_priv->drm,
> + !dev_priv->display.compute_pipe_wm))
> + return 0;
> + return dev_priv->display.compute_intermediate_wm(state, crtc);
> +}
> +
> +static bool intel_initial_watermarks(struct intel_atomic_state *state,
> +  struct intel_crtc *crtc)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + if (dev_priv->display.initial_watermarks) {
> + dev_priv->display.initial_watermarks(state, crtc);
> + return true;
> + }
> + return false;
> +}
> +
> +static void intel_atomic_update_watermarks(struct intel_atomic_state *state,
> +struct intel_crtc *crtc)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + if (dev_priv->display.atomic_update_watermarks)
> + dev_priv->display.atomic_update_watermarks(state, crtc);
> +}
> +
> +static void intel_optimize_watermarks(struct intel_atomic_state *state,
> +   struct intel_crtc *crtc)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + if (dev_priv->display.optimize_watermarks)
> + dev_priv->display.optimize_watermarks(state, crtc);
> +}
> +
> +static void intel_compute_global_watermarks(struct intel_atomic_state *state)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + if (dev_priv->display.compute_global_watermarks)
> + dev_priv->display.compute_global_wat

Re: [Intel-gfx] [PATCH 06/25] drm/i915: add wrappers around cdclk vtable funcs.

2021-09-14 Thread Jani Nikula
On Fri, 10 Sep 2021, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This adds wrappers around all the vtable callers so they are in
> one place.
>
> Suggested by Jani.
>
> Signed-off-by: Dave Airlie 

Reviewed-by: Jani Nikula 


> ---
>  drivers/gpu/drm/i915/display/intel_cdclk.c| 47 +++
>  drivers/gpu/drm/i915/display/intel_cdclk.h|  4 +-
>  drivers/gpu/drm/i915/display/intel_display.c  |  2 +-
>  .../drm/i915/display/intel_display_power.c|  2 +-
>  4 files changed, 44 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
> b/drivers/gpu/drm/i915/display/intel_cdclk.c
> index 9aec17b33819..0e09f259914f 100644
> --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
> +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
> @@ -59,6 +59,37 @@
>   * dividers can be programmed correctly.
>   */
>  
> +void intel_cdclk_get_cdclk(struct drm_i915_private *dev_priv,
> +struct intel_cdclk_config *cdclk_config)
> +{
> + dev_priv->display.get_cdclk(dev_priv, cdclk_config);
> +}
> +
> +int intel_cdclk_bw_calc_min_cdclk(struct intel_atomic_state *state)
> +{
> + struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> + return dev_priv->display.bw_calc_min_cdclk(state);
> +}
> +
> +static void intel_cdclk_set_cdclk(struct drm_i915_private *dev_priv,
> +   const struct intel_cdclk_config *cdclk_config,
> +   enum pipe pipe)
> +{
> + dev_priv->display.set_cdclk(dev_priv, cdclk_config, pipe);
> +}
> +
> +static int intel_cdclk_modeset_calc_cdclk(struct drm_i915_private *dev_priv,
> +   struct intel_cdclk_state 
> *cdclk_config)
> +{
> + return dev_priv->display.modeset_calc_cdclk(cdclk_config);
> +}
> +
> +static u8 intel_cdclk_calc_voltage_level(struct drm_i915_private *dev_priv,
> +  int cdclk)
> +{
> + return dev_priv->display.calc_voltage_level(cdclk);
> +}
> +
>  static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv,
>  struct intel_cdclk_config *cdclk_config)
>  {
> @@ -1466,7 +1497,7 @@ static void bxt_get_cdclk(struct drm_i915_private 
> *dev_priv,
>* at least what the CDCLK frequency requires.
>*/
>   cdclk_config->voltage_level =
> - dev_priv->display.calc_voltage_level(cdclk_config->cdclk);
> + intel_cdclk_calc_voltage_level(dev_priv, cdclk_config->cdclk);
>  }
>  
>  static void bxt_de_pll_disable(struct drm_i915_private *dev_priv)
> @@ -1777,7 +1808,7 @@ static void bxt_cdclk_init_hw(struct drm_i915_private 
> *dev_priv)
>   cdclk_config.cdclk = bxt_calc_cdclk(dev_priv, 0);
>   cdclk_config.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_config.cdclk);
>   cdclk_config.voltage_level =
> - dev_priv->display.calc_voltage_level(cdclk_config.cdclk);
> + intel_cdclk_calc_voltage_level(dev_priv, cdclk_config.cdclk);
>  
>   bxt_set_cdclk(dev_priv, &cdclk_config, INVALID_PIPE);
>  }
> @@ -1789,7 +1820,7 @@ static void bxt_cdclk_uninit_hw(struct drm_i915_private 
> *dev_priv)
>   cdclk_config.cdclk = cdclk_config.bypass;
>   cdclk_config.vco = 0;
>   cdclk_config.voltage_level =
> - dev_priv->display.calc_voltage_level(cdclk_config.cdclk);
> + intel_cdclk_calc_voltage_level(dev_priv, cdclk_config.cdclk);
>  
>   bxt_set_cdclk(dev_priv, &cdclk_config, INVALID_PIPE);
>  }
> @@ -1956,7 +1987,7 @@ static void intel_set_cdclk(struct drm_i915_private 
> *dev_priv,
>&dev_priv->gmbus_mutex);
>   }
>  
> - dev_priv->display.set_cdclk(dev_priv, cdclk_config, pipe);
> + intel_cdclk_set_cdclk(dev_priv, cdclk_config, pipe);
>  
>   for_each_intel_dp(&dev_priv->drm, encoder) {
>   struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
> @@ -2424,7 +2455,7 @@ static int bxt_modeset_calc_cdclk(struct 
> intel_cdclk_state *cdclk_state)
>   cdclk_state->logical.cdclk = cdclk;
>   cdclk_state->logical.voltage_level =
>   max_t(int, min_voltage_level,
> -   dev_priv->display.calc_voltage_level(cdclk));
> +   intel_cdclk_calc_voltage_level(dev_priv, cdclk));
>  
>   if (!cdclk_state->active_pipes) {
>   cdclk = bxt_calc_cdclk(dev_priv, cdclk_state->force_min_cdclk);
> @@ -2433,7 +2464,7 @@ static int bxt_modeset_calc_cdclk(struct 
> intel_cdclk_state *cdclk_state)
>   cdclk_state->actual.vco = vco;
>   cdclk_state->actual.cdclk = cdclk;
>   cdclk_state->actual.voltage_level =
> - dev_priv->display.calc_voltage_level(cdclk);
> + intel_cdclk_calc_voltage_level(dev_priv, cdclk);
>   } else {
>   cdclk_state->actual = cdclk_state->logical;
>   }
> @@ -2525,7 +2556,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_stat

Re: [Intel-gfx] [PATCH 07/25] drm/i915/display: add intel_fdi_link_train wrapper.

2021-09-14 Thread Jani Nikula
On Fri, 10 Sep 2021, Dave Airlie  wrote:
> From: Dave Airlie 
>
> This wraps the fdi link training vfunc to make it clearer.
>
> Suggested by Jani.
>
> Signed-off-by: Dave Airlie 

Reviewed-by: Jani Nikula 


> ---
>  drivers/gpu/drm/i915/display/intel_display.c | 2 +-
>  drivers/gpu/drm/i915/display/intel_fdi.c | 8 
>  drivers/gpu/drm/i915/display/intel_fdi.h | 2 ++
>  3 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
> b/drivers/gpu/drm/i915/display/intel_display.c
> index 71518e71591b..aa174192c279 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -2156,7 +2156,7 @@ static void ilk_pch_enable(const struct 
> intel_atomic_state *state,
>   assert_pch_transcoder_disabled(dev_priv, pipe);
>  
>   /* For PCH output, training FDI link */
> - dev_priv->display.fdi_link_train(crtc, crtc_state);
> + intel_fdi_link_train(crtc, crtc_state);
>  
>   /* We need to program the right clock selection before writing the pixel
>* mutliplier into the DPLL. */
> diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c 
> b/drivers/gpu/drm/i915/display/intel_fdi.c
> index fc09b781f15f..339243399a65 100644
> --- a/drivers/gpu/drm/i915/display/intel_fdi.c
> +++ b/drivers/gpu/drm/i915/display/intel_fdi.c
> @@ -10,6 +10,14 @@
>  #include "intel_fdi.h"
>  #include "intel_sideband.h"
>  
> +void intel_fdi_link_train(struct intel_crtc *crtc,
> +   const struct intel_crtc_state *crtc_state)
> +{
> + struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +
> + dev_priv->display.fdi_link_train(crtc, crtc_state);
> +}
> +
>  /* units of 100MHz */
>  static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state)
>  {
> diff --git a/drivers/gpu/drm/i915/display/intel_fdi.h 
> b/drivers/gpu/drm/i915/display/intel_fdi.h
> index 60acf2133145..61cb216a09f5 100644
> --- a/drivers/gpu/drm/i915/display/intel_fdi.h
> +++ b/drivers/gpu/drm/i915/display/intel_fdi.h
> @@ -26,4 +26,6 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
>  void intel_fdi_pll_freq_update(struct drm_i915_private *i915);
>  void lpt_fdi_program_mphy(struct drm_i915_private *i915);
>  
> +void intel_fdi_link_train(struct intel_crtc *crtc,
> +   const struct intel_crtc_state *crtc_state);
>  #endif

-- 
Jani Nikula, Intel Open Source Graphics Center


Re: [Intel-gfx] [PATCH v2 2/7] drm/ttm: add TTM_PAGE_FLAG_SHMEM

2021-09-14 Thread Matthew Auld
On Tue, 14 Sept 2021 at 10:03, Christian König  wrote:
>
> Am 14.09.21 um 10:50 schrieb Matthew Auld:
> > Add new flag to indicate special shmem based tt, which can directly
> > handle swapping itself, and should be visible to some shrinker.
> >
> > As part of this we should skip the ttm_pages_allocated accounting, since
> > such tt objects should already be reachable, and potentially reclaimable
> > by some shrinker, if under memory pressure, and so shouldn't directly
> > count towards the swap "watermark" level.
> >
> > We also need to stop touching the page->mapping and page->index for such
> > objects, like in ttm_tt_add_mapping, since shmem already uses these.
> > Some drivers seems to depend on the tt mapping/index behaviour for their
> > own purposes, so directly using shmem tt likely won't be usable there
> > as-is.
> >
> > Signed-off-by: Matthew Auld 
> > Cc: Thomas Hellström 
> > Cc: Christian König 
> > ---
> >   drivers/gpu/drm/ttm/ttm_bo_vm.c |  4 ++--
> >   drivers/gpu/drm/ttm/ttm_tt.c| 10 +-
> >   include/drm/ttm/ttm_tt.h|  1 +
> >   3 files changed, 8 insertions(+), 7 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c 
> > b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > index f56be5bc0861..e2131c73dcb6 100644
> > --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
> > @@ -346,8 +346,8 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault 
> > *vmf,
> >   } else if (unlikely(!page)) {
> >   break;
> >   }
> > - page->index = drm_vma_node_start(&bo->base.vma_node) +
> > - page_offset;
> > + if (!(bo->ttm->page_flags & TTM_PAGE_FLAG_SHMEM))
> > + page->index = 
> > drm_vma_node_start(&bo->base.vma_node) + page_offset;
>
> I still have a rather bad feeling about that.
>
> This should either not be necessary any more in general or the shmemfile
> approach doesn't work correctly.
>
> Please send a patch to remove this for everybody instead and we will see
> if that really works or not.
>
> >   pfn = page_to_pfn(page);
> >   }
> >
> > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> > index dae52433beeb..cc4815c1f505 100644
> > --- a/drivers/gpu/drm/ttm/ttm_tt.c
> > +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> > @@ -293,7 +293,7 @@ static void ttm_tt_add_mapping(struct ttm_device *bdev, 
> > struct ttm_tt *ttm)
> >   {
> >   pgoff_t i;
> >
> > - if (ttm->page_flags & TTM_PAGE_FLAG_SG)
> > + if (ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))
>
> Maybe you should re-use the TTM_PAGE_FLAG_SG for this and/or rename the
> flag to better describe what it does.
>
> Something like TTM_PAGE_FLAG_EXTERNAL or similar? The only other use
> case for TTM_PAGE_FLAG_SG which comes to my mind is controlling if the
> pages array is allocated or not.

This seems slightly tricky. We still want ttm_bo_vm_reserve() to
behave normally when seeing shmem_tt, and yet it still needs to return
SIGBUS or so for FLAG_SG, as per the existing behaviour. Throwing in
bo->type == type_sg seems maybe plausible, but at least amdgpu is
manually setting FLAG_SG for userptr objects, so I presume bo->type !=
type_sg here?

Otherwise maybe just s/SHMEM/EXTERNAL and leave FLAG_SG as-is?

>
> Christian.
>
> >   return;
> >
> >   for (i = 0; i < ttm->num_pages; ++i)
> > @@ -311,7 +311,7 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >   if (ttm_tt_is_populated(ttm))
> >   return 0;
> >
> > - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > + if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
> >   atomic_long_add(ttm->num_pages, &ttm_pages_allocated);
> >   if (bdev->pool.use_dma32)
> >   atomic_long_add(ttm->num_pages,
> > @@ -349,7 +349,7 @@ int ttm_tt_populate(struct ttm_device *bdev,
> >   return 0;
> >
> >   error:
> > - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > + if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))) {
> >   atomic_long_sub(ttm->num_pages, &ttm_pages_allocated);
> >   if (bdev->pool.use_dma32)
> >   atomic_long_sub(ttm->num_pages,
> > @@ -364,7 +364,7 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
> >   pgoff_t i;
> >   struct page **page = ttm->pages;
> >
> > - if (ttm->page_flags & TTM_PAGE_FLAG_SG)
> > + if (ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLAG_SHMEM))
> >   return;
> >
> >   for (i = 0; i < ttm->num_pages; ++i) {
> > @@ -384,7 +384,7 @@ void ttm_tt_unpopulate(struct ttm_device *bdev, struct 
> > ttm_tt *ttm)
> >   else
> >   ttm_pool_free(&bdev->pool, ttm);
> >
> > - if (!(ttm->page_flags & TTM_PAGE_FLAG_SG)) {
> > + if (!(ttm->page_flags & (TTM_PAGE_FLAG_SG | TTM_PAGE_FLA

Re: [Intel-gfx] [PATCH v2 0/9] Move vfio_ccw to the new mdev API

2021-09-14 Thread Jason Gunthorpe
On Mon, Sep 13, 2021 at 04:31:54PM -0400, Eric Farman wrote:
> > I rebased it and fixed it up here:
> > 
> > https://github.com/jgunthorpe/linux/tree/vfio_ccw
> > 
> > Can you try again?
> 
> That does address the crash, but then why is it processing a BROKEN
> event? Seems problematic. 

The stuff related to the NOT_OPER looked really wonky to me. I'm
guessing this is the issue - not sure about the pmcw.ena either..

diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index 5ea392959c0711..0d4d4f425befac 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -380,29 +380,19 @@ static void fsm_open(struct vfio_ccw_private *private,
spin_unlock_irq(sch->lock);
 }
 
-static void fsm_close(struct vfio_ccw_private *private,
- enum vfio_ccw_event event)
+static int flush_sch(struct vfio_ccw_private *private)
 {
struct subchannel *sch = private->sch;
DECLARE_COMPLETION_ONSTACK(completion);
int iretry, ret = 0;
 
-   spin_lock_irq(sch->lock);
-   if (!sch->schib.pmcw.ena)
-   goto err_unlock;
-   ret = cio_disable_subchannel(sch);
-   if (ret != -EBUSY)
-   goto err_unlock;
-
iretry = 255;
do {
-
ret = cio_cancel_halt_clear(sch, &iretry);
-
if (ret == -EIO) {
pr_err("vfio_ccw: could not quiesce subchannel 
0.%x.%04x!\n",
   sch->schid.ssid, sch->schid.sch_no);
-   break;
+   return ret;
}
 
/*
@@ -413,13 +403,28 @@ static void fsm_close(struct vfio_ccw_private *private,
spin_unlock_irq(sch->lock);
 
if (ret == -EBUSY)
-   wait_for_completion_timeout(&completion, 3*HZ);
+   wait_for_completion_timeout(&completion, 3 * HZ);
 
private->completion = NULL;
flush_workqueue(vfio_ccw_work_q);
spin_lock_irq(sch->lock);
ret = cio_disable_subchannel(sch);
} while (ret == -EBUSY);
+   return ret;
+}
+
+static void fsm_close(struct vfio_ccw_private *private,
+ enum vfio_ccw_event event)
+{
+   struct subchannel *sch = private->sch;
+   int ret;
+
+   spin_lock_irq(sch->lock);
+   if (!sch->schib.pmcw.ena)
+   goto err_unlock;
+   ret = cio_disable_subchannel(sch);
+   if (ret == -EBUSY)
+   ret = flush_sch(private);
if (ret)
goto err_unlock;
private->state = VFIO_CCW_STATE_CLOSED;


Re: [Intel-gfx] [PATCH 1/2] drm/i915/xehpsdv: Define MOCS table for XeHP SDV

2021-09-14 Thread Clint Taylor

Appears to match latest BSPEC

Reviewed-by: Clint Taylor 

-Clint


On 9/3/21 5:35 PM, Matt Roper wrote:

From: Lucas De Marchi 

Like DG1, XeHP SDV doesn't have LLC/eDRAM control values due to being a
dgfx card. XeHP SDV adds 2 more bits: L3_GLBGO to "push the Go point to
memory for L3 destined transaction" and L3_LKP to "enable Lookup for
uncacheable accesses".

Bspec: 45101
Cc: Daniele Ceraolo Spurio 
Signed-off-by: Lucas De Marchi 
Signed-off-by: Stuart Summers 
Signed-off-by: Matt Roper 
---
  drivers/gpu/drm/i915/gt/intel_mocs.c | 35 +++-
  1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c 
b/drivers/gpu/drm/i915/gt/intel_mocs.c
index e96afd7beb49..133cfe07cb9f 100644
--- a/drivers/gpu/drm/i915/gt/intel_mocs.c
+++ b/drivers/gpu/drm/i915/gt/intel_mocs.c
@@ -42,6 +42,8 @@ struct drm_i915_mocs_table {
  #define L3_ESC(value) ((value) << 0)
  #define L3_SCC(value) ((value) << 1)
  #define _L3_CACHEABILITY(value)   ((value) << 4)
+#define L3_GLBGO(value)((value) << 6)
+#define L3_LKUP(value) ((value) << 7)
  
  /* Helper defines */

  #define GEN9_NUM_MOCS_ENTRIES 64  /* 63-64 are reserved, but configured. */
@@ -315,6 +317,31 @@ static const struct drm_i915_mocs_entry dg1_mocs_table[] = 
{
MOCS_ENTRY(63, 0, L3_1_UC),
  };
  
+static const struct drm_i915_mocs_entry xehpsdv_mocs_table[] = {

+   /* wa_1608975824 */
+   MOCS_ENTRY(0, 0, L3_3_WB | L3_LKUP(1)),
+
+   /* UC - Coherent; GO:L3 */
+   MOCS_ENTRY(1, 0, L3_1_UC | L3_LKUP(1)),
+   /* UC - Coherent; GO:Memory */
+   MOCS_ENTRY(2, 0, L3_1_UC | L3_GLBGO(1) | L3_LKUP(1)),
+   /* UC - Non-Coherent; GO:Memory */
+   MOCS_ENTRY(3, 0, L3_1_UC | L3_GLBGO(1)),
+   /* UC - Non-Coherent; GO:L3 */
+   MOCS_ENTRY(4, 0, L3_1_UC),
+
+   /* WB */
+   MOCS_ENTRY(5, 0, L3_3_WB | L3_LKUP(1)),
+
+   /* HW Reserved - SW program but never use. */
+   MOCS_ENTRY(48, 0, L3_3_WB | L3_LKUP(1)),
+   MOCS_ENTRY(49, 0, L3_1_UC | L3_LKUP(1)),
+   MOCS_ENTRY(60, 0, L3_1_UC),
+   MOCS_ENTRY(61, 0, L3_1_UC),
+   MOCS_ENTRY(62, 0, L3_1_UC),
+   MOCS_ENTRY(63, 0, L3_1_UC),
+};
+
  enum {
HAS_GLOBAL_MOCS = BIT(0),
HAS_ENGINE_MOCS = BIT(1),
@@ -344,7 +371,13 @@ static unsigned int get_mocs_settings(const struct 
drm_i915_private *i915,
memset(table, 0, sizeof(struct drm_i915_mocs_table));
  
  	table->unused_entries_index = I915_MOCS_PTE;

-   if (IS_DG1(i915)) {
+   if (IS_XEHPSDV(i915)) {
+   table->size = ARRAY_SIZE(xehpsdv_mocs_table);
+   table->table = xehpsdv_mocs_table;
+   table->uc_index = 2;
+   table->n_entries = GEN9_NUM_MOCS_ENTRIES;
+   table->unused_entries_index = 5;
+   } else if (IS_DG1(i915)) {
table->size = ARRAY_SIZE(dg1_mocs_table);
table->table = dg1_mocs_table;
table->uc_index = 1;


Re: [Intel-gfx] [PATCH 08/27] drm/i915: Add logical engine mapping

2021-09-14 Thread Matthew Brost
On Tue, Sep 14, 2021 at 09:34:08AM +0100, Tvrtko Ursulin wrote:
> 
> On 13/09/2021 17:50, Matthew Brost wrote:
> > On Mon, Sep 13, 2021 at 10:24:43AM +0100, Tvrtko Ursulin wrote:
> > > 
> > > On 10/09/2021 20:49, Matthew Brost wrote:
> > > > On Fri, Sep 10, 2021 at 12:12:42PM +0100, Tvrtko Ursulin wrote:
> > > > > 
> > > > > On 20/08/2021 23:44, Matthew Brost wrote:
> > > > > > Add logical engine mapping. This is required for split-frame, as
> > > > > > workloads need to be placed on engines in a logically contiguous 
> > > > > > manner.
> > > > > > 
> > > > > > v2:
> > > > > > (Daniel Vetter)
> > > > > >  - Add kernel doc for new fields
> > > > > > 
> > > > > > Signed-off-by: Matthew Brost 
> > > > > > ---
> > > > > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 60 
> > > > > > ---
> > > > > > drivers/gpu/drm/i915/gt/intel_engine_types.h  |  5 ++
> > > > > > .../drm/i915/gt/intel_execlists_submission.c  |  1 +
> > > > > > drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c|  2 +-
> > > > > > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 21 +--
> > > > > > 5 files changed, 60 insertions(+), 29 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > > > > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > index 0d9105a31d84..4d790f9a65dd 100644
> > > > > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > > > > > @@ -290,7 +290,8 @@ static void nop_irq_handler(struct 
> > > > > > intel_engine_cs *engine, u16 iir)
> > > > > > GEM_DEBUG_WARN_ON(iir);
> > > > > > }
> > > > > > -static int intel_engine_setup(struct intel_gt *gt, enum 
> > > > > > intel_engine_id id)
> > > > > > +static int intel_engine_setup(struct intel_gt *gt, enum 
> > > > > > intel_engine_id id,
> > > > > > + u8 logical_instance)
> > > > > > {
> > > > > > const struct engine_info *info = &intel_engines[id];
> > > > > > struct drm_i915_private *i915 = gt->i915;
> > > > > > @@ -334,6 +335,7 @@ static int intel_engine_setup(struct intel_gt 
> > > > > > *gt, enum intel_engine_id id)
> > > > > > engine->class = info->class;
> > > > > > engine->instance = info->instance;
> > > > > > +   engine->logical_mask = BIT(logical_instance);
> > > > > > __sprint_engine_name(engine);
> > > > > > engine->props.heartbeat_interval_ms =
> > > > > > @@ -572,6 +574,37 @@ static intel_engine_mask_t 
> > > > > > init_engine_mask(struct intel_gt *gt)
> > > > > > return info->engine_mask;
> > > > > > }
> > > > > > +static void populate_logical_ids(struct intel_gt *gt, u8 
> > > > > > *logical_ids,
> > > > > > +u8 class, const u8 *map, u8 
> > > > > > num_instances)
> > > > > > +{
> > > > > > +   int i, j;
> > > > > > +   u8 current_logical_id = 0;
> > > > > > +
> > > > > > +   for (j = 0; j < num_instances; ++j) {
> > > > > > +   for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) {
> > > > > > +   if (!HAS_ENGINE(gt, i) ||
> > > > > > +   intel_engines[i].class != class)
> > > > > > +   continue;
> > > > > > +
> > > > > > +   if (intel_engines[i].instance == map[j]) {
> > > > > > +   logical_ids[intel_engines[i].instance] =
> > > > > > +   current_logical_id++;
> > > > > > +   break;
> > > > > > +   }
> > > > > > +   }
> > > > > > +   }
> > > > > > +}
> > > > > > +
> > > > > > +static void setup_logical_ids(struct intel_gt *gt, u8 
> > > > > > *logical_ids, u8 class)
> > > > > > +{
> > > > > > +   int i;
> > > > > > +   u8 map[MAX_ENGINE_INSTANCE + 1];
> > > > > > +
> > > > > > +   for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i)
> > > > > > +   map[i] = i;
> > > > > 
> > > > > What's the point of the map array since it is 1:1 with instance?
> > > > > 
> > > > 
> > > > Future products do not have a 1 to 1 mapping and that mapping can change
> > > > based on fusing, e.g. XeHP SDV.
> > > > 
> > > > Also technically ICL / TGL / ADL physical instance 2 maps to logical
> > > > instance 1.
> > > 
> > > I don't follow the argument. All I can see is that "map[i] = i" always in
> > > the proposed code, which is then used to check "instance == 
> > > map[instance]".
> > > So I'd suggest to remove this array from the code until there is a need 
> > > for
> > > it.
> > > 
> > 
> > Ok, this logic is slightly confusing and makes more sense once we have
> > non-standard mappings. Yes, map is setup in a 1 to 1 mapping by default
> > with the value in map[i] being a physical instance. Populate_logical_ids
> > searches the map finding all physical instances present in the map
> > assigning each found instance a new logical id increasing by 1 each
> > time.
> > 
> > e.g. If the map 

[Intel-gfx] ✗ Fi.CI.BUILD: failure for Move vfio_ccw to the new mdev API (rev3)

2021-09-14 Thread Patchwork
== Series Details ==

Series: Move vfio_ccw to the new mdev API (rev3)
URL   : https://patchwork.freedesktop.org/series/94520/
State : failure

== Summary ==

Patch is empty.
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".




Re: [Intel-gfx] [PATCH 09/25] drm/i915: split watermark vfuncs from display vtable.

2021-09-14 Thread Jani Nikula
On Fri, 10 Sep 2021, Dave Airlie  wrote:
> From: Dave Airlie 
>
> These are the watermark api between display and pm.
>
> Signed-off-by: Dave Airlie 

Reviewed-by: Jani Nikula 

> ---
>  drivers/gpu/drm/i915/display/intel_display.c | 35 -
>  drivers/gpu/drm/i915/i915_drv.h  | 24 
>  drivers/gpu/drm/i915/intel_pm.c  | 40 ++--
>  3 files changed, 54 insertions(+), 45 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
> b/drivers/gpu/drm/i915/display/intel_display.c
> index aa174192c279..706aa9d385bd 100644
> --- a/drivers/gpu/drm/i915/display/intel_display.c
> +++ b/drivers/gpu/drm/i915/display/intel_display.c
> @@ -162,16 +162,16 @@ static void intel_modeset_setup_hw_state(struct 
> drm_device *dev,
>   */
>  static void intel_update_watermarks(struct drm_i915_private *dev_priv)
>  {
> - if (dev_priv->display.update_wm)
> - dev_priv->display.update_wm(dev_priv);
> + if (dev_priv->wm_disp.update_wm)
> + dev_priv->wm_disp.update_wm(dev_priv);
>  }
>  
>  static int intel_compute_pipe_wm(struct intel_atomic_state *state,
>struct intel_crtc *crtc)
>  {
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> - if (dev_priv->display.compute_pipe_wm)
> - return dev_priv->display.compute_pipe_wm(state, crtc);
> + if (dev_priv->wm_disp.compute_pipe_wm)
> + return dev_priv->wm_disp.compute_pipe_wm(state, crtc);
>   return 0;
>  }
>  
> @@ -179,20 +179,20 @@ static int intel_compute_intermediate_wm(struct 
> intel_atomic_state *state,
>struct intel_crtc *crtc)
>  {
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> - if (!dev_priv->display.compute_intermediate_wm)
> + if (!dev_priv->wm_disp.compute_intermediate_wm)
>   return 0;
>   if (drm_WARN_ON(&dev_priv->drm,
> - !dev_priv->display.compute_pipe_wm))
> + !dev_priv->wm_disp.compute_pipe_wm))
>   return 0;
> - return dev_priv->display.compute_intermediate_wm(state, crtc);
> + return dev_priv->wm_disp.compute_intermediate_wm(state, crtc);
>  }
>  
>  static bool intel_initial_watermarks(struct intel_atomic_state *state,
>struct intel_crtc *crtc)
>  {
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> - if (dev_priv->display.initial_watermarks) {
> - dev_priv->display.initial_watermarks(state, crtc);
> + if (dev_priv->wm_disp.initial_watermarks) {
> + dev_priv->wm_disp.initial_watermarks(state, crtc);
>   return true;
>   }
>   return false;
> @@ -202,23 +202,23 @@ static void intel_atomic_update_watermarks(struct 
> intel_atomic_state *state,
>  struct intel_crtc *crtc)
>  {
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> - if (dev_priv->display.atomic_update_watermarks)
> - dev_priv->display.atomic_update_watermarks(state, crtc);
> + if (dev_priv->wm_disp.atomic_update_watermarks)
> + dev_priv->wm_disp.atomic_update_watermarks(state, crtc);
>  }
>  
>  static void intel_optimize_watermarks(struct intel_atomic_state *state,
> struct intel_crtc *crtc)
>  {
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> - if (dev_priv->display.optimize_watermarks)
> - dev_priv->display.optimize_watermarks(state, crtc);
> + if (dev_priv->wm_disp.optimize_watermarks)
> + dev_priv->wm_disp.optimize_watermarks(state, crtc);
>  }
>  
>  static void intel_compute_global_watermarks(struct intel_atomic_state *state)
>  {
>   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
> - if (dev_priv->display.compute_global_watermarks)
> - dev_priv->display.compute_global_watermarks(state);
> + if (dev_priv->wm_disp.compute_global_watermarks)
> + dev_priv->wm_disp.compute_global_watermarks(state);
>  }
>  
>  /* returns HPLL frequency in kHz */
> @@ -3669,6 +3669,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state 
> *state,
>  
>   if (!intel_initial_watermarks(state, crtc))
>   intel_update_watermarks(dev_priv);
> +
>   intel_enable_pipe(new_crtc_state);
>  
>   intel_crtc_vblank_on(new_crtc_state);
> @@ -3734,7 +3735,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state 
> *state,
>   if (DISPLAY_VER(dev_priv) != 2)
>   intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
>  
> - if (!dev_priv->display.initial_watermarks)
> + if (!dev_priv->wm_disp.initial_watermarks)
>   intel_update_watermarks(dev_priv);
>  
>   /* clock the pipe down to 640x480@60 to potentially save power */
> @@ -11387,7 +11388,7 @@ static void sanitize_waterm

[Intel-gfx] [PATCH 00/24] i915/display: split and constify vtable (v5)

2021-09-14 Thread Jani Nikula
This is Dave's series [1] with patch 2 (drm/i915/uncore: constify the
register vtables.) dropped because it conflicts between drm-intel-next
and drm-intel-gt-next. I want to get proper CI results on this before
merging. We can do the leftover patch afterwards. Everything else is
unmodified.

BR,
Jani.

[1] https://patchwork.freedesktop.org/series/94529/

Dave Airlie (24):
  drm/i915/uncore: split the fw get function into separate vfunc
  drm/i915/pm: drop get_fifo_size vfunc.
  drm/i915: make update_wm take a dev_priv.
  drm/i915/wm: provide wrappers around watermark vfuncs calls (v2)
  drm/i915: add wrappers around cdclk vtable funcs.
  drm/i915/display: add intel_fdi_link_train wrapper.
  drm/i915: split clock gating init from display vtable
  drm/i915: split watermark vfuncs from display vtable.
  drm/i915: split color functions from display vtable
  drm/i915: split audio functions from display vtable
  drm/i915: split cdclk functions from display vtable.
  drm/i915: split irq hotplug function from display vtable
  drm/i915: split fdi link training from display vtable.
  drm/i915: split the dpll clock compute out from display vtable.
  drm/i915: constify fdi link training vtable
  drm/i915: constify hotplug function vtable.
  drm/i915: constify color function vtable.
  drm/i915: constify the audio function vtable
  drm/i915: constify the dpll clock vtable
  drm/i915: constify the cdclk vtable
  drm/i915: drop unused function ptr and comments.
  drm/i915: constify display function vtable
  drm/i915: constify clock gating init vtable.
  drm/i915: constify display wm vtable

 drivers/gpu/drm/i915/display/intel_audio.c|  43 ++-
 drivers/gpu/drm/i915/display/intel_cdclk.c| 337 +-
 drivers/gpu/drm/i915/display/intel_cdclk.h|   4 +-
 drivers/gpu/drm/i915/display/intel_color.c| 138 ---
 drivers/gpu/drm/i915/display/intel_display.c  | 291 +--
 .../drm/i915/display/intel_display_power.c|   2 +-
 drivers/gpu/drm/i915/display/intel_dpll.c |  48 ++-
 drivers/gpu/drm/i915/display/intel_fdi.c  |  26 +-
 drivers/gpu/drm/i915/display/intel_fdi.h  |   2 +
 drivers/gpu/drm/i915/display/intel_hotplug.c  |   4 +-
 drivers/gpu/drm/i915/i915_drv.h   | 124 ---
 drivers/gpu/drm/i915/i915_irq.c   |  28 +-
 drivers/gpu/drm/i915/intel_pm.c   | 231 ++--
 drivers/gpu/drm/i915/intel_pm.h   |   1 -
 drivers/gpu/drm/i915/intel_uncore.c   |  70 ++--
 drivers/gpu/drm/i915/intel_uncore.h   |   7 +-
 16 files changed, 888 insertions(+), 468 deletions(-)

-- 
2.30.2



[Intel-gfx] [PATCH 01/24] drm/i915/uncore: split the fw get function into separate vfunc

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

constify it while here. drop the put function since it was never
overloaded and always has done the same thing, no point in
indirecting it for show.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/intel_uncore.c | 70 -
 drivers/gpu/drm/i915/intel_uncore.h |  7 +--
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_uncore.c 
b/drivers/gpu/drm/i915/intel_uncore.c
index f9767054dbdf..8652e4221404 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -36,6 +36,12 @@
 
 #define __raw_posting_read(...) ((void)__raw_uncore_read32(__VA_ARGS__))
 
+static void
+fw_domains_get(struct intel_uncore *uncore, enum forcewake_domains fw_domains)
+{
+   uncore->fw_get_funcs->force_wake_get(uncore, fw_domains);
+}
+
 void
 intel_uncore_mmio_debug_init_early(struct intel_uncore_mmio_debug *mmio_debug)
 {
@@ -248,7 +254,7 @@ fw_domain_put(const struct intel_uncore_forcewake_domain *d)
 }
 
 static void
-fw_domains_get(struct intel_uncore *uncore, enum forcewake_domains fw_domains)
+fw_domains_get_normal(struct intel_uncore *uncore, enum forcewake_domains 
fw_domains)
 {
struct intel_uncore_forcewake_domain *d;
unsigned int tmp;
@@ -396,7 +402,7 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
 
GEM_BUG_ON(!domain->wake_count);
if (--domain->wake_count == 0)
-   uncore->funcs.force_wake_put(uncore, domain->mask);
+   fw_domains_put(uncore, domain->mask);
 
spin_unlock_irqrestore(&uncore->lock, irqflags);
 
@@ -454,7 +460,7 @@ intel_uncore_forcewake_reset(struct intel_uncore *uncore)
 
fw = uncore->fw_domains_active;
if (fw)
-   uncore->funcs.force_wake_put(uncore, fw);
+   fw_domains_put(uncore, fw);
 
fw_domains_reset(uncore, uncore->fw_domains);
assert_forcewakes_inactive(uncore);
@@ -562,7 +568,7 @@ static void forcewake_early_sanitize(struct intel_uncore 
*uncore,
intel_uncore_forcewake_reset(uncore);
if (restore_forcewake) {
spin_lock_irq(&uncore->lock);
-   uncore->funcs.force_wake_get(uncore, restore_forcewake);
+   fw_domains_get(uncore, restore_forcewake);
 
if (intel_uncore_has_fifo(uncore))
uncore->fifo_count = fifo_free_entries(uncore);
@@ -623,7 +629,7 @@ static void __intel_uncore_forcewake_get(struct 
intel_uncore *uncore,
}
 
if (fw_domains)
-   uncore->funcs.force_wake_get(uncore, fw_domains);
+   fw_domains_get(uncore, fw_domains);
 }
 
 /**
@@ -644,7 +650,7 @@ void intel_uncore_forcewake_get(struct intel_uncore *uncore,
 {
unsigned long irqflags;
 
-   if (!uncore->funcs.force_wake_get)
+   if (!uncore->fw_get_funcs)
return;
 
assert_rpm_wakelock_held(uncore->rpm);
@@ -711,7 +717,7 @@ void intel_uncore_forcewake_get__locked(struct intel_uncore 
*uncore,
 {
lockdep_assert_held(&uncore->lock);
 
-   if (!uncore->funcs.force_wake_get)
+   if (!uncore->fw_get_funcs)
return;
 
__intel_uncore_forcewake_get(uncore, fw_domains);
@@ -733,7 +739,7 @@ static void __intel_uncore_forcewake_put(struct 
intel_uncore *uncore,
continue;
}
 
-   uncore->funcs.force_wake_put(uncore, domain->mask);
+   fw_domains_put(uncore, domain->mask);
}
 }
 
@@ -750,7 +756,7 @@ void intel_uncore_forcewake_put(struct intel_uncore *uncore,
 {
unsigned long irqflags;
 
-   if (!uncore->funcs.force_wake_put)
+   if (!uncore->fw_get_funcs)
return;
 
spin_lock_irqsave(&uncore->lock, irqflags);
@@ -769,7 +775,7 @@ void intel_uncore_forcewake_flush(struct intel_uncore 
*uncore,
struct intel_uncore_forcewake_domain *domain;
unsigned int tmp;
 
-   if (!uncore->funcs.force_wake_put)
+   if (!uncore->fw_get_funcs)
return;
 
fw_domains &= uncore->fw_domains;
@@ -793,7 +799,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore 
*uncore,
 {
lockdep_assert_held(&uncore->lock);
 
-   if (!uncore->funcs.force_wake_put)
+   if (!uncore->fw_get_funcs)
return;
 
__intel_uncore_forcewake_put(uncore, fw_domains);
@@ -801,7 +807,7 @@ void intel_uncore_forcewake_put__locked(struct intel_uncore 
*uncore,
 
 void assert_forcewakes_inactive(struct intel_uncore *uncore)
 {
-   if (!uncore->funcs.force_wake_get)
+   if (!uncore->fw_get_funcs)
return;
 
drm_WARN(&uncore->i915->drm, uncore->fw_domains_active,
@@ -818,7 +824,7 @@ void assert_forcewakes_active(struct intel_uncore *uncore,
if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM))
return;
 
-   if (!uncore->funcs.force_w

[Intel-gfx] [PATCH 02/24] drm/i915/pm: drop get_fifo_size vfunc.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

The i845_update_wm code was always calling the i845 variant,
and the i9xx_update_wm had only a choice between i830 and i9xx
paths, hardly worth the vfunc overhead.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/i915_drv.h |  2 --
 drivers/gpu/drm/i915/intel_pm.c | 20 +++-
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 37c1ca266bcd..b24c0d1dbd31 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -330,8 +330,6 @@ struct drm_i915_display_funcs {
  const struct intel_cdclk_config *cdclk_config,
  enum pipe pipe);
int (*bw_calc_min_cdclk)(struct intel_atomic_state *state);
-   int (*get_fifo_size)(struct drm_i915_private *dev_priv,
-enum i9xx_plane_id i9xx_plane);
int (*compute_pipe_wm)(struct intel_atomic_state *state,
   struct intel_crtc *crtc);
int (*compute_intermediate_wm)(struct intel_atomic_state *state,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index cfc41f8fa74a..d9993eb3730d 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2347,7 +2347,10 @@ static void i9xx_update_wm(struct intel_crtc 
*unused_crtc)
else
wm_info = &i830_a_wm_info;
 
-   fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_A);
+   if (DISPLAY_VER(dev_priv) == 2)
+   fifo_size = i830_get_fifo_size(dev_priv, PLANE_A);
+   else
+   fifo_size = i9xx_get_fifo_size(dev_priv, PLANE_A);
crtc = intel_get_crtc_for_plane(dev_priv, PLANE_A);
if (intel_crtc_active(crtc)) {
const struct drm_display_mode *pipe_mode =
@@ -2374,7 +2377,10 @@ static void i9xx_update_wm(struct intel_crtc 
*unused_crtc)
if (DISPLAY_VER(dev_priv) == 2)
wm_info = &i830_bc_wm_info;
 
-   fifo_size = dev_priv->display.get_fifo_size(dev_priv, PLANE_B);
+   if (DISPLAY_VER(dev_priv) == 2)
+   fifo_size = i830_get_fifo_size(dev_priv, PLANE_B);
+   else
+   fifo_size = i9xx_get_fifo_size(dev_priv, PLANE_B);
crtc = intel_get_crtc_for_plane(dev_priv, PLANE_B);
if (intel_crtc_active(crtc)) {
const struct drm_display_mode *pipe_mode =
@@ -2490,7 +2496,7 @@ static void i845_update_wm(struct intel_crtc *unused_crtc)
pipe_mode = &crtc->config->hw.pipe_mode;
planea_wm = intel_calculate_wm(pipe_mode->crtc_clock,
   &i845_wm_info,
-  
dev_priv->display.get_fifo_size(dev_priv, PLANE_A),
+  i845_get_fifo_size(dev_priv, PLANE_A),
   4, pessimal_latency_ns);
fwater_lo = intel_uncore_read(&dev_priv->uncore, FW_BLC) & ~0xfff;
fwater_lo |= (3<<8) | planea_wm;
@@ -8054,15 +8060,11 @@ void intel_init_pm(struct drm_i915_private *dev_priv)
dev_priv->display.update_wm = i965_update_wm;
} else if (DISPLAY_VER(dev_priv) == 3) {
dev_priv->display.update_wm = i9xx_update_wm;
-   dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
} else if (DISPLAY_VER(dev_priv) == 2) {
-   if (INTEL_NUM_PIPES(dev_priv) == 1) {
+   if (INTEL_NUM_PIPES(dev_priv) == 1)
dev_priv->display.update_wm = i845_update_wm;
-   dev_priv->display.get_fifo_size = i845_get_fifo_size;
-   } else {
+   else
dev_priv->display.update_wm = i9xx_update_wm;
-   dev_priv->display.get_fifo_size = i830_get_fifo_size;
-   }
} else {
drm_err(&dev_priv->drm,
"unexpected fall-through in %s\n", __func__);
-- 
2.30.2



[Intel-gfx] [PATCH 03/24] drm/i915: make update_wm take a dev_priv.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

The crtc was never being used here.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_display.c | 10 +-
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/intel_pm.c  | 20 +++-
 drivers/gpu/drm/i915/intel_pm.h  |  2 +-
 4 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index a7ca38613f89..109e213d8f75 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2374,7 +2374,7 @@ static void intel_post_plane_update(struct 
intel_atomic_state *state,
intel_frontbuffer_flip(dev_priv, new_crtc_state->fb_bits);
 
if (new_crtc_state->update_wm_post && new_crtc_state->hw.active)
-   intel_update_watermarks(crtc);
+   intel_update_watermarks(dev_priv);
 
if (hsw_post_update_enable_ips(old_crtc_state, new_crtc_state))
hsw_enable_ips(new_crtc_state);
@@ -2531,7 +2531,7 @@ static void intel_pre_plane_update(struct 
intel_atomic_state *state,
if (dev_priv->display.initial_watermarks)
dev_priv->display.initial_watermarks(state, crtc);
else if (new_crtc_state->update_wm_pre)
-   intel_update_watermarks(crtc);
+   intel_update_watermarks(dev_priv);
}
 
/*
@@ -3578,7 +3578,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state 
*state,
if (dev_priv->display.initial_watermarks)
dev_priv->display.initial_watermarks(state, crtc);
else
-   intel_update_watermarks(crtc);
+   intel_update_watermarks(dev_priv);
intel_enable_pipe(new_crtc_state);
 
intel_crtc_vblank_on(new_crtc_state);
@@ -3645,7 +3645,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state 
*state,
intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 
if (!dev_priv->display.initial_watermarks)
-   intel_update_watermarks(crtc);
+   intel_update_watermarks(dev_priv);
 
/* clock the pipe down to 640x480@60 to potentially save power */
if (IS_I830(dev_priv))
@@ -3721,7 +3721,7 @@ static void intel_crtc_disable_noatomic(struct intel_crtc 
*crtc,
encoder->base.crtc = NULL;
 
intel_fbc_disable(crtc);
-   intel_update_watermarks(crtc);
+   intel_update_watermarks(dev_priv);
intel_disable_shared_dpll(crtc_state);
 
intel_display_power_put_all_in_set(dev_priv, 
&crtc->enabled_power_domains);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index b24c0d1dbd31..18e7a6a2d4a4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -341,7 +341,7 @@ struct drm_i915_display_funcs {
void (*optimize_watermarks)(struct intel_atomic_state *state,
struct intel_crtc *crtc);
int (*compute_global_watermarks)(struct intel_atomic_state *state);
-   void (*update_wm)(struct intel_crtc *crtc);
+   void (*update_wm)(struct drm_i915_private *dev_priv);
int (*modeset_calc_cdclk)(struct intel_cdclk_state *state);
u8 (*calc_voltage_level)(int cdclk);
/* Returns the active state of the crtc, and if the crtc is active,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index d9993eb3730d..be6520756aae 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -881,9 +881,8 @@ static struct intel_crtc *single_enabled_crtc(struct 
drm_i915_private *dev_priv)
return enabled;
 }
 
-static void pnv_update_wm(struct intel_crtc *unused_crtc)
+static void pnv_update_wm(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
struct intel_crtc *crtc;
const struct cxsr_latency *latency;
u32 reg;
@@ -2253,9 +2252,8 @@ static void vlv_optimize_watermarks(struct 
intel_atomic_state *state,
mutex_unlock(&dev_priv->wm.wm_mutex);
 }
 
-static void i965_update_wm(struct intel_crtc *unused_crtc)
+static void i965_update_wm(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
struct intel_crtc *crtc;
int srwm = 1;
int cursor_sr = 16;
@@ -2329,9 +2327,8 @@ static void i965_update_wm(struct intel_crtc *unused_crtc)
 
 #undef FW_WM
 
-static void i9xx_update_wm(struct intel_crtc *unused_crtc)
+static void i9xx_update_wm(struct drm_i915_private *dev_priv)
 {
-   struct drm_i915_private *dev_priv = to_i915(unused_crtc->base.dev);
const struct intel_watermark_params *wm_info;
u32 fwater_lo;
u32 fwater_hi;
@@ -2481,9 +2478,8 @@ 

[Intel-gfx] [PATCH 04/24] drm/i915/wm: provide wrappers around watermark vfuncs calls (v2)

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This moves one wrapper from the pm->display side, and creates
wrappers for all the others, this should simplify things later.

One thing to note is that the code checks the existance of some
of these ptrs, so the wrappers are a bit complicated by that.

Suggested by Jani.

v2: fixup warnings in wrong place error.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_display.c | 187 ---
 drivers/gpu/drm/i915/intel_pm.c  |  39 
 drivers/gpu/drm/i915/intel_pm.h  |   1 -
 3 files changed, 123 insertions(+), 104 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 109e213d8f75..6fbf2d99d096 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -126,6 +126,101 @@ static void ilk_pfit_enable(const struct intel_crtc_state 
*crtc_state);
 static void intel_modeset_setup_hw_state(struct drm_device *dev,
 struct drm_modeset_acquire_ctx *ctx);
 
+
+/**
+ * intel_update_watermarks - update FIFO watermark values based on current 
modes
+ * @dev_priv: i915 device
+ *
+ * Calculate watermark values for the various WM regs based on current mode
+ * and plane configuration.
+ *
+ * There are several cases to deal with here:
+ *   - normal (i.e. non-self-refresh)
+ *   - self-refresh (SR) mode
+ *   - lines are large relative to FIFO size (buffer can hold up to 2)
+ *   - lines are small relative to FIFO size (buffer can hold more than 2
+ * lines), so need to account for TLB latency
+ *
+ *   The normal calculation is:
+ * watermark = dotclock * bytes per pixel * latency
+ *   where latency is platform & configuration dependent (we assume pessimal
+ *   values here).
+ *
+ *   The SR calculation is:
+ * watermark = (trunc(latency/line time)+1) * surface width *
+ *   bytes per pixel
+ *   where
+ * line time = htotal / dotclock
+ * surface width = hdisplay for normal plane and 64 for cursor
+ *   and latency is assumed to be high, as above.
+ *
+ * The final value programmed to the register should always be rounded up,
+ * and include an extra 2 entries to account for clock crossings.
+ *
+ * We don't use the sprite, so we can ignore that.  And on Crestline we have
+ * to set the non-SR watermarks to 8.
+ */
+static void intel_update_watermarks(struct drm_i915_private *dev_priv)
+{
+   if (dev_priv->display.update_wm)
+   dev_priv->display.update_wm(dev_priv);
+}
+
+static int intel_compute_pipe_wm(struct intel_atomic_state *state,
+struct intel_crtc *crtc)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   if (dev_priv->display.compute_pipe_wm)
+   return dev_priv->display.compute_pipe_wm(state, crtc);
+   return 0;
+}
+
+static int intel_compute_intermediate_wm(struct intel_atomic_state *state,
+struct intel_crtc *crtc)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   if (!dev_priv->display.compute_intermediate_wm)
+   return 0;
+   if (drm_WARN_ON(&dev_priv->drm,
+   !dev_priv->display.compute_pipe_wm))
+   return 0;
+   return dev_priv->display.compute_intermediate_wm(state, crtc);
+}
+
+static bool intel_initial_watermarks(struct intel_atomic_state *state,
+struct intel_crtc *crtc)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   if (dev_priv->display.initial_watermarks) {
+   dev_priv->display.initial_watermarks(state, crtc);
+   return true;
+   }
+   return false;
+}
+
+static void intel_atomic_update_watermarks(struct intel_atomic_state *state,
+  struct intel_crtc *crtc)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   if (dev_priv->display.atomic_update_watermarks)
+   dev_priv->display.atomic_update_watermarks(state, crtc);
+}
+
+static void intel_optimize_watermarks(struct intel_atomic_state *state,
+ struct intel_crtc *crtc)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   if (dev_priv->display.optimize_watermarks)
+   dev_priv->display.optimize_watermarks(state, crtc);
+}
+
+static void intel_compute_global_watermarks(struct intel_atomic_state *state)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   if (dev_priv->display.compute_global_watermarks)
+   dev_priv->display.compute_global_watermarks(state);
+}
+
 /* returns HPLL frequency in kHz */
 int vlv_get_hpll_vco(struct drm_i915_private *dev_priv)
 {
@@ -2528,9 +2623,8 @@ static void intel_pre_plane_update(struct 
intel

[Intel-gfx] [PATCH 06/24] drm/i915/display: add intel_fdi_link_train wrapper.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This wraps the fdi link training vfunc to make it clearer.

Suggested by Jani.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_display.c | 2 +-
 drivers/gpu/drm/i915/display/intel_fdi.c | 8 
 drivers/gpu/drm/i915/display/intel_fdi.h | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index d2df74dc777e..8c8c1c7cdd86 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -2156,7 +2156,7 @@ static void ilk_pch_enable(const struct 
intel_atomic_state *state,
assert_pch_transcoder_disabled(dev_priv, pipe);
 
/* For PCH output, training FDI link */
-   dev_priv->display.fdi_link_train(crtc, crtc_state);
+   intel_fdi_link_train(crtc, crtc_state);
 
/* We need to program the right clock selection before writing the pixel
 * mutliplier into the DPLL. */
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c 
b/drivers/gpu/drm/i915/display/intel_fdi.c
index fc09b781f15f..339243399a65 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -10,6 +10,14 @@
 #include "intel_fdi.h"
 #include "intel_sideband.h"
 
+void intel_fdi_link_train(struct intel_crtc *crtc,
+ const struct intel_crtc_state *crtc_state)
+{
+   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+
+   dev_priv->display.fdi_link_train(crtc, crtc_state);
+}
+
 /* units of 100MHz */
 static int pipe_required_fdi_lanes(struct intel_crtc_state *crtc_state)
 {
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.h 
b/drivers/gpu/drm/i915/display/intel_fdi.h
index 60acf2133145..61cb216a09f5 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.h
+++ b/drivers/gpu/drm/i915/display/intel_fdi.h
@@ -26,4 +26,6 @@ void hsw_fdi_link_train(struct intel_encoder *encoder,
 void intel_fdi_pll_freq_update(struct drm_i915_private *i915);
 void lpt_fdi_program_mphy(struct drm_i915_private *i915);
 
+void intel_fdi_link_train(struct intel_crtc *crtc,
+ const struct intel_crtc_state *crtc_state);
 #endif
-- 
2.30.2



[Intel-gfx] [PATCH 05/24] drm/i915: add wrappers around cdclk vtable funcs.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This adds wrappers around all the vtable callers so they are in
one place.

Suggested by Jani.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c| 47 +++
 drivers/gpu/drm/i915/display/intel_cdclk.h|  4 +-
 drivers/gpu/drm/i915/display/intel_display.c  |  2 +-
 .../drm/i915/display/intel_display_power.c|  2 +-
 4 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 9aec17b33819..0e09f259914f 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -59,6 +59,37 @@
  * dividers can be programmed correctly.
  */
 
+void intel_cdclk_get_cdclk(struct drm_i915_private *dev_priv,
+  struct intel_cdclk_config *cdclk_config)
+{
+   dev_priv->display.get_cdclk(dev_priv, cdclk_config);
+}
+
+int intel_cdclk_bw_calc_min_cdclk(struct intel_atomic_state *state)
+{
+   struct drm_i915_private *dev_priv = to_i915(state->base.dev);
+   return dev_priv->display.bw_calc_min_cdclk(state);
+}
+
+static void intel_cdclk_set_cdclk(struct drm_i915_private *dev_priv,
+ const struct intel_cdclk_config *cdclk_config,
+ enum pipe pipe)
+{
+   dev_priv->display.set_cdclk(dev_priv, cdclk_config, pipe);
+}
+
+static int intel_cdclk_modeset_calc_cdclk(struct drm_i915_private *dev_priv,
+ struct intel_cdclk_state 
*cdclk_config)
+{
+   return dev_priv->display.modeset_calc_cdclk(cdclk_config);
+}
+
+static u8 intel_cdclk_calc_voltage_level(struct drm_i915_private *dev_priv,
+int cdclk)
+{
+   return dev_priv->display.calc_voltage_level(cdclk);
+}
+
 static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv,
   struct intel_cdclk_config *cdclk_config)
 {
@@ -1466,7 +1497,7 @@ static void bxt_get_cdclk(struct drm_i915_private 
*dev_priv,
 * at least what the CDCLK frequency requires.
 */
cdclk_config->voltage_level =
-   dev_priv->display.calc_voltage_level(cdclk_config->cdclk);
+   intel_cdclk_calc_voltage_level(dev_priv, cdclk_config->cdclk);
 }
 
 static void bxt_de_pll_disable(struct drm_i915_private *dev_priv)
@@ -1777,7 +1808,7 @@ static void bxt_cdclk_init_hw(struct drm_i915_private 
*dev_priv)
cdclk_config.cdclk = bxt_calc_cdclk(dev_priv, 0);
cdclk_config.vco = bxt_calc_cdclk_pll_vco(dev_priv, cdclk_config.cdclk);
cdclk_config.voltage_level =
-   dev_priv->display.calc_voltage_level(cdclk_config.cdclk);
+   intel_cdclk_calc_voltage_level(dev_priv, cdclk_config.cdclk);
 
bxt_set_cdclk(dev_priv, &cdclk_config, INVALID_PIPE);
 }
@@ -1789,7 +1820,7 @@ static void bxt_cdclk_uninit_hw(struct drm_i915_private 
*dev_priv)
cdclk_config.cdclk = cdclk_config.bypass;
cdclk_config.vco = 0;
cdclk_config.voltage_level =
-   dev_priv->display.calc_voltage_level(cdclk_config.cdclk);
+   intel_cdclk_calc_voltage_level(dev_priv, cdclk_config.cdclk);
 
bxt_set_cdclk(dev_priv, &cdclk_config, INVALID_PIPE);
 }
@@ -1956,7 +1987,7 @@ static void intel_set_cdclk(struct drm_i915_private 
*dev_priv,
 &dev_priv->gmbus_mutex);
}
 
-   dev_priv->display.set_cdclk(dev_priv, cdclk_config, pipe);
+   intel_cdclk_set_cdclk(dev_priv, cdclk_config, pipe);
 
for_each_intel_dp(&dev_priv->drm, encoder) {
struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
@@ -2424,7 +2455,7 @@ static int bxt_modeset_calc_cdclk(struct 
intel_cdclk_state *cdclk_state)
cdclk_state->logical.cdclk = cdclk;
cdclk_state->logical.voltage_level =
max_t(int, min_voltage_level,
- dev_priv->display.calc_voltage_level(cdclk));
+ intel_cdclk_calc_voltage_level(dev_priv, cdclk));
 
if (!cdclk_state->active_pipes) {
cdclk = bxt_calc_cdclk(dev_priv, cdclk_state->force_min_cdclk);
@@ -2433,7 +2464,7 @@ static int bxt_modeset_calc_cdclk(struct 
intel_cdclk_state *cdclk_state)
cdclk_state->actual.vco = vco;
cdclk_state->actual.cdclk = cdclk;
cdclk_state->actual.voltage_level =
-   dev_priv->display.calc_voltage_level(cdclk);
+   intel_cdclk_calc_voltage_level(dev_priv, cdclk);
} else {
cdclk_state->actual = cdclk_state->logical;
}
@@ -2525,7 +2556,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state 
*state)
new_cdclk_state->active_pipes =
intel_calc_active_pipes(state, old_cdclk_state->active_pipes);
 
-   ret = dev_pr

[Intel-gfx] [PATCH 07/24] drm/i915: split clock gating init from display vtable

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This function is only used inside intel_pm.c

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/i915_drv.h |  9 ++-
 drivers/gpu/drm/i915/intel_pm.c | 48 -
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 18e7a6a2d4a4..20a415579707 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -323,6 +323,11 @@ struct intel_crtc;
 struct intel_limit;
 struct dpll;
 
+/* functions used internal in intel_pm.c */
+struct drm_i915_clock_gating_funcs {
+   void (*init_clock_gating)(struct drm_i915_private *dev_priv);
+};
+
 struct drm_i915_display_funcs {
void (*get_cdclk)(struct drm_i915_private *dev_priv,
  struct intel_cdclk_config *cdclk_config);
@@ -365,7 +370,6 @@ struct drm_i915_display_funcs {
const struct drm_connector_state 
*old_conn_state);
void (*fdi_link_train)(struct intel_crtc *crtc,
   const struct intel_crtc_state *crtc_state);
-   void (*init_clock_gating)(struct drm_i915_private *dev_priv);
void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
/* clock updates for mode set */
/* cursor updates */
@@ -954,6 +958,9 @@ struct drm_i915_private {
/* unbound hipri wq for page flips/plane updates */
struct workqueue_struct *flip_wq;
 
+   /* pm private clock gating functions */
+   struct drm_i915_clock_gating_funcs clock_gating_funcs;
+
/* Display functions */
struct drm_i915_display_funcs display;
 
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 4054c6f7a2f9..add50ff01d7c 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7871,7 +7871,7 @@ static void i830_init_clock_gating(struct 
drm_i915_private *dev_priv)
 
 void intel_init_clock_gating(struct drm_i915_private *dev_priv)
 {
-   dev_priv->display.init_clock_gating(dev_priv);
+   dev_priv->clock_gating_funcs.init_clock_gating(dev_priv);
 }
 
 void intel_suspend_hw(struct drm_i915_private *dev_priv)
@@ -7898,52 +7898,52 @@ static void nop_init_clock_gating(struct 
drm_i915_private *dev_priv)
 void intel_init_clock_gating_hooks(struct drm_i915_private *dev_priv)
 {
if (IS_ALDERLAKE_P(dev_priv))
-   dev_priv->display.init_clock_gating = adlp_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
adlp_init_clock_gating;
else if (IS_DG1(dev_priv))
-   dev_priv->display.init_clock_gating = dg1_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
dg1_init_clock_gating;
else if (GRAPHICS_VER(dev_priv) == 12)
-   dev_priv->display.init_clock_gating = gen12lp_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
gen12lp_init_clock_gating;
else if (GRAPHICS_VER(dev_priv) == 11)
-   dev_priv->display.init_clock_gating = icl_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
icl_init_clock_gating;
else if (IS_COFFEELAKE(dev_priv) || IS_COMETLAKE(dev_priv))
-   dev_priv->display.init_clock_gating = cfl_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
cfl_init_clock_gating;
else if (IS_SKYLAKE(dev_priv))
-   dev_priv->display.init_clock_gating = skl_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
skl_init_clock_gating;
else if (IS_KABYLAKE(dev_priv))
-   dev_priv->display.init_clock_gating = kbl_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
kbl_init_clock_gating;
else if (IS_BROXTON(dev_priv))
-   dev_priv->display.init_clock_gating = bxt_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
bxt_init_clock_gating;
else if (IS_GEMINILAKE(dev_priv))
-   dev_priv->display.init_clock_gating = glk_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
glk_init_clock_gating;
else if (IS_BROADWELL(dev_priv))
-   dev_priv->display.init_clock_gating = bdw_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
bdw_init_clock_gating;
else if (IS_CHERRYVIEW(dev_priv))
-   dev_priv->display.init_clock_gating = chv_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
chv_init_clock_gating;
else if (IS_HASWELL(dev_priv))
-   dev_priv->display.init_clock_gating = hsw_init_clock_gating;
+   dev_priv->clock_gating_funcs.init_clock_gating = 
hsw_init_clock_gating;
else if (IS_IVYBRIDGE(dev

[Intel-gfx] [PATCH 08/24] drm/i915: split watermark vfuncs from display vtable.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

These are the watermark api between display and pm.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_display.c | 35 -
 drivers/gpu/drm/i915/i915_drv.h  | 24 
 drivers/gpu/drm/i915/intel_pm.c  | 40 ++--
 3 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 8c8c1c7cdd86..6ed32100924c 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -162,16 +162,16 @@ static void intel_modeset_setup_hw_state(struct 
drm_device *dev,
  */
 static void intel_update_watermarks(struct drm_i915_private *dev_priv)
 {
-   if (dev_priv->display.update_wm)
-   dev_priv->display.update_wm(dev_priv);
+   if (dev_priv->wm_disp.update_wm)
+   dev_priv->wm_disp.update_wm(dev_priv);
 }
 
 static int intel_compute_pipe_wm(struct intel_atomic_state *state,
 struct intel_crtc *crtc)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   if (dev_priv->display.compute_pipe_wm)
-   return dev_priv->display.compute_pipe_wm(state, crtc);
+   if (dev_priv->wm_disp.compute_pipe_wm)
+   return dev_priv->wm_disp.compute_pipe_wm(state, crtc);
return 0;
 }
 
@@ -179,20 +179,20 @@ static int intel_compute_intermediate_wm(struct 
intel_atomic_state *state,
 struct intel_crtc *crtc)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   if (!dev_priv->display.compute_intermediate_wm)
+   if (!dev_priv->wm_disp.compute_intermediate_wm)
return 0;
if (drm_WARN_ON(&dev_priv->drm,
-   !dev_priv->display.compute_pipe_wm))
+   !dev_priv->wm_disp.compute_pipe_wm))
return 0;
-   return dev_priv->display.compute_intermediate_wm(state, crtc);
+   return dev_priv->wm_disp.compute_intermediate_wm(state, crtc);
 }
 
 static bool intel_initial_watermarks(struct intel_atomic_state *state,
 struct intel_crtc *crtc)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   if (dev_priv->display.initial_watermarks) {
-   dev_priv->display.initial_watermarks(state, crtc);
+   if (dev_priv->wm_disp.initial_watermarks) {
+   dev_priv->wm_disp.initial_watermarks(state, crtc);
return true;
}
return false;
@@ -202,23 +202,23 @@ static void intel_atomic_update_watermarks(struct 
intel_atomic_state *state,
   struct intel_crtc *crtc)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   if (dev_priv->display.atomic_update_watermarks)
-   dev_priv->display.atomic_update_watermarks(state, crtc);
+   if (dev_priv->wm_disp.atomic_update_watermarks)
+   dev_priv->wm_disp.atomic_update_watermarks(state, crtc);
 }
 
 static void intel_optimize_watermarks(struct intel_atomic_state *state,
  struct intel_crtc *crtc)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   if (dev_priv->display.optimize_watermarks)
-   dev_priv->display.optimize_watermarks(state, crtc);
+   if (dev_priv->wm_disp.optimize_watermarks)
+   dev_priv->wm_disp.optimize_watermarks(state, crtc);
 }
 
 static void intel_compute_global_watermarks(struct intel_atomic_state *state)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   if (dev_priv->display.compute_global_watermarks)
-   dev_priv->display.compute_global_watermarks(state);
+   if (dev_priv->wm_disp.compute_global_watermarks)
+   dev_priv->wm_disp.compute_global_watermarks(state);
 }
 
 /* returns HPLL frequency in kHz */
@@ -3669,6 +3669,7 @@ static void i9xx_crtc_enable(struct intel_atomic_state 
*state,
 
if (!intel_initial_watermarks(state, crtc))
intel_update_watermarks(dev_priv);
+
intel_enable_pipe(new_crtc_state);
 
intel_crtc_vblank_on(new_crtc_state);
@@ -3734,7 +3735,7 @@ static void i9xx_crtc_disable(struct intel_atomic_state 
*state,
if (DISPLAY_VER(dev_priv) != 2)
intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, false);
 
-   if (!dev_priv->display.initial_watermarks)
+   if (!dev_priv->wm_disp.initial_watermarks)
intel_update_watermarks(dev_priv);
 
/* clock the pipe down to 640x480@60 to potentially save power */
@@ -11387,7 +11388,7 @@ static void sanitize_watermarks(struct drm_i915_private 
*dev_priv)
int i;
 
/* Only supported on platforms that use atomic watermark desig

[Intel-gfx] [PATCH 09/24] drm/i915: split color functions from display vtable

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

These are only used internally in the color module

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_color.c | 64 +++---
 drivers/gpu/drm/i915/i915_drv.h| 39 +++--
 2 files changed, 54 insertions(+), 49 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_color.c 
b/drivers/gpu/drm/i915/display/intel_color.c
index afcb4bf3826c..ed79075158dd 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -1137,14 +1137,14 @@ void intel_color_load_luts(const struct 
intel_crtc_state *crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   dev_priv->display.load_luts(crtc_state);
+   dev_priv->color_funcs.load_luts(crtc_state);
 }
 
 void intel_color_commit(const struct intel_crtc_state *crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   dev_priv->display.color_commit(crtc_state);
+   dev_priv->color_funcs.color_commit(crtc_state);
 }
 
 static bool intel_can_preload_luts(const struct intel_crtc_state 
*new_crtc_state)
@@ -1200,15 +1200,15 @@ int intel_color_check(struct intel_crtc_state 
*crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   return dev_priv->display.color_check(crtc_state);
+   return dev_priv->color_funcs.color_check(crtc_state);
 }
 
 void intel_color_get_config(struct intel_crtc_state *crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   if (dev_priv->display.read_luts)
-   dev_priv->display.read_luts(crtc_state);
+   if (dev_priv->color_funcs.read_luts)
+   dev_priv->color_funcs.read_luts(crtc_state);
 }
 
 static bool need_plane_update(struct intel_plane *plane,
@@ -2101,51 +2101,51 @@ void intel_color_init(struct intel_crtc *crtc)
 
if (HAS_GMCH(dev_priv)) {
if (IS_CHERRYVIEW(dev_priv)) {
-   dev_priv->display.color_check = chv_color_check;
-   dev_priv->display.color_commit = i9xx_color_commit;
-   dev_priv->display.load_luts = chv_load_luts;
-   dev_priv->display.read_luts = chv_read_luts;
+   dev_priv->color_funcs.color_check = chv_color_check;
+   dev_priv->color_funcs.color_commit = i9xx_color_commit;
+   dev_priv->color_funcs.load_luts = chv_load_luts;
+   dev_priv->color_funcs.read_luts = chv_read_luts;
} else if (DISPLAY_VER(dev_priv) >= 4) {
-   dev_priv->display.color_check = i9xx_color_check;
-   dev_priv->display.color_commit = i9xx_color_commit;
-   dev_priv->display.load_luts = i965_load_luts;
-   dev_priv->display.read_luts = i965_read_luts;
+   dev_priv->color_funcs.color_check = i9xx_color_check;
+   dev_priv->color_funcs.color_commit = i9xx_color_commit;
+   dev_priv->color_funcs.load_luts = i965_load_luts;
+   dev_priv->color_funcs.read_luts = i965_read_luts;
} else {
-   dev_priv->display.color_check = i9xx_color_check;
-   dev_priv->display.color_commit = i9xx_color_commit;
-   dev_priv->display.load_luts = i9xx_load_luts;
-   dev_priv->display.read_luts = i9xx_read_luts;
+   dev_priv->color_funcs.color_check = i9xx_color_check;
+   dev_priv->color_funcs.color_commit = i9xx_color_commit;
+   dev_priv->color_funcs.load_luts = i9xx_load_luts;
+   dev_priv->color_funcs.read_luts = i9xx_read_luts;
}
} else {
if (DISPLAY_VER(dev_priv) >= 11)
-   dev_priv->display.color_check = icl_color_check;
+   dev_priv->color_funcs.color_check = icl_color_check;
else if (DISPLAY_VER(dev_priv) >= 10)
-   dev_priv->display.color_check = glk_color_check;
+   dev_priv->color_funcs.color_check = glk_color_check;
else if (DISPLAY_VER(dev_priv) >= 7)
-   dev_priv->display.color_check = ivb_color_check;
+   dev_priv->color_funcs.color_check = ivb_color_check;
else
-   dev_priv->display.color_check = ilk_color_check;
+   dev_priv->color_funcs.color_check = ilk_color_check;
 
if (DISPLAY_VER(dev_priv) >= 9)
-   dev_priv->display.color_commit = skl_color_commit;
+   dev_priv->color_funcs.color_commit = skl_color_commit;
else if (I

[Intel-gfx] [PATCH 10/24] drm/i915: split audio functions from display vtable

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

These are only used internally in the audio code

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_audio.c | 24 +++---
 drivers/gpu/drm/i915/i915_drv.h| 19 +++--
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_audio.c 
b/drivers/gpu/drm/i915/display/intel_audio.c
index 532237588511..f539826c0424 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -848,8 +848,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 
connector->eld[6] = drm_av_sync_delay(connector, adjusted_mode) / 2;
 
-   if (dev_priv->display.audio_codec_enable)
-   dev_priv->display.audio_codec_enable(encoder,
+   if (dev_priv->audio_funcs.audio_codec_enable)
+   dev_priv->audio_funcs.audio_codec_enable(encoder,
 crtc_state,
 conn_state);
 
@@ -893,8 +893,8 @@ void intel_audio_codec_disable(struct intel_encoder 
*encoder,
enum port port = encoder->port;
enum pipe pipe = crtc->pipe;
 
-   if (dev_priv->display.audio_codec_disable)
-   dev_priv->display.audio_codec_disable(encoder,
+   if (dev_priv->audio_funcs.audio_codec_disable)
+   dev_priv->audio_funcs.audio_codec_disable(encoder,
  old_crtc_state,
  old_conn_state);
 
@@ -922,17 +922,17 @@ void intel_audio_codec_disable(struct intel_encoder 
*encoder,
 void intel_init_audio_hooks(struct drm_i915_private *dev_priv)
 {
if (IS_G4X(dev_priv)) {
-   dev_priv->display.audio_codec_enable = g4x_audio_codec_enable;
-   dev_priv->display.audio_codec_disable = g4x_audio_codec_disable;
+   dev_priv->audio_funcs.audio_codec_enable = 
g4x_audio_codec_enable;
+   dev_priv->audio_funcs.audio_codec_disable = 
g4x_audio_codec_disable;
} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
-   dev_priv->display.audio_codec_enable = ilk_audio_codec_enable;
-   dev_priv->display.audio_codec_disable = ilk_audio_codec_disable;
+   dev_priv->audio_funcs.audio_codec_enable = 
ilk_audio_codec_enable;
+   dev_priv->audio_funcs.audio_codec_disable = 
ilk_audio_codec_disable;
} else if (IS_HASWELL(dev_priv) || DISPLAY_VER(dev_priv) >= 8) {
-   dev_priv->display.audio_codec_enable = hsw_audio_codec_enable;
-   dev_priv->display.audio_codec_disable = hsw_audio_codec_disable;
+   dev_priv->audio_funcs.audio_codec_enable = 
hsw_audio_codec_enable;
+   dev_priv->audio_funcs.audio_codec_disable = 
hsw_audio_codec_disable;
} else if (HAS_PCH_SPLIT(dev_priv)) {
-   dev_priv->display.audio_codec_enable = ilk_audio_codec_enable;
-   dev_priv->display.audio_codec_disable = ilk_audio_codec_disable;
+   dev_priv->audio_funcs.audio_codec_enable = 
ilk_audio_codec_enable;
+   dev_priv->audio_funcs.audio_codec_disable = 
ilk_audio_codec_disable;
}
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 50199ae71bda..8c6ede06d5ec 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -364,6 +364,15 @@ struct intel_color_funcs {
void (*read_luts)(struct intel_crtc_state *crtc_state);
 };
 
+struct intel_audio_funcs {
+   void (*audio_codec_enable)(struct intel_encoder *encoder,
+  const struct intel_crtc_state *crtc_state,
+  const struct drm_connector_state 
*conn_state);
+   void (*audio_codec_disable)(struct intel_encoder *encoder,
+   const struct intel_crtc_state 
*old_crtc_state,
+   const struct drm_connector_state 
*old_conn_state);
+};
+
 struct drm_i915_display_funcs {
void (*get_cdclk)(struct drm_i915_private *dev_priv,
  struct intel_cdclk_config *cdclk_config);
@@ -386,12 +395,7 @@ struct drm_i915_display_funcs {
 struct intel_crtc *crtc);
void (*commit_modeset_enables)(struct intel_atomic_state *state);
void (*commit_modeset_disables)(struct intel_atomic_state *state);
-   void (*audio_codec_enable)(struct intel_encoder *encoder,
-  const struct intel_crtc_state *crtc_state,
-  const struct drm_connector_state 
*conn_state);
-   void (*audio_codec_disable)(struct intel_encoder *encoder,
-   const struct intel_crtc_state 
*old_crtc_state,
- 

[Intel-gfx] [PATCH 11/24] drm/i915: split cdclk functions from display vtable.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This moves all the cdclk related functions into their own vtable.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_cdclk.c | 142 ++---
 drivers/gpu/drm/i915/i915_drv.h|   8 +-
 2 files changed, 78 insertions(+), 72 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c 
b/drivers/gpu/drm/i915/display/intel_cdclk.c
index 0e09f259914f..27a4a226aa49 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -62,32 +62,32 @@
 void intel_cdclk_get_cdclk(struct drm_i915_private *dev_priv,
   struct intel_cdclk_config *cdclk_config)
 {
-   dev_priv->display.get_cdclk(dev_priv, cdclk_config);
+   dev_priv->cdclk_funcs.get_cdclk(dev_priv, cdclk_config);
 }
 
 int intel_cdclk_bw_calc_min_cdclk(struct intel_atomic_state *state)
 {
struct drm_i915_private *dev_priv = to_i915(state->base.dev);
-   return dev_priv->display.bw_calc_min_cdclk(state);
+   return dev_priv->cdclk_funcs.bw_calc_min_cdclk(state);
 }
 
 static void intel_cdclk_set_cdclk(struct drm_i915_private *dev_priv,
  const struct intel_cdclk_config *cdclk_config,
  enum pipe pipe)
 {
-   dev_priv->display.set_cdclk(dev_priv, cdclk_config, pipe);
+   dev_priv->cdclk_funcs.set_cdclk(dev_priv, cdclk_config, pipe);
 }
 
 static int intel_cdclk_modeset_calc_cdclk(struct drm_i915_private *dev_priv,
  struct intel_cdclk_state 
*cdclk_config)
 {
-   return dev_priv->display.modeset_calc_cdclk(cdclk_config);
+   return dev_priv->cdclk_funcs.modeset_calc_cdclk(cdclk_config);
 }
 
 static u8 intel_cdclk_calc_voltage_level(struct drm_i915_private *dev_priv,
 int cdclk)
 {
-   return dev_priv->display.calc_voltage_level(cdclk);
+   return dev_priv->cdclk_funcs.calc_voltage_level(cdclk);
 }
 
 static void fixed_133mhz_get_cdclk(struct drm_i915_private *dev_priv,
@@ -1963,7 +1963,7 @@ static void intel_set_cdclk(struct drm_i915_private 
*dev_priv,
if (!intel_cdclk_changed(&dev_priv->cdclk.hw, cdclk_config))
return;
 
-   if (drm_WARN_ON_ONCE(&dev_priv->drm, !dev_priv->display.set_cdclk))
+   if (drm_WARN_ON_ONCE(&dev_priv->drm, !dev_priv->cdclk_funcs.set_cdclk))
return;
 
intel_dump_cdclk_config(cdclk_config, "Changing CDCLK to");
@@ -2893,119 +2893,119 @@ u32 intel_read_rawclk(struct drm_i915_private 
*dev_priv)
 void intel_init_cdclk_hooks(struct drm_i915_private *dev_priv)
 {
if (IS_DG2(dev_priv)) {
-   dev_priv->display.set_cdclk = bxt_set_cdclk;
-   dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk;
-   dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
-   dev_priv->display.calc_voltage_level = tgl_calc_voltage_level;
+   dev_priv->cdclk_funcs.set_cdclk = bxt_set_cdclk;
+   dev_priv->cdclk_funcs.bw_calc_min_cdclk = skl_bw_calc_min_cdclk;
+   dev_priv->cdclk_funcs.modeset_calc_cdclk = 
bxt_modeset_calc_cdclk;
+   dev_priv->cdclk_funcs.calc_voltage_level = 
tgl_calc_voltage_level;
dev_priv->cdclk.table = dg2_cdclk_table;
} else if (IS_ALDERLAKE_P(dev_priv)) {
-   dev_priv->display.set_cdclk = bxt_set_cdclk;
-   dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk;
-   dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
-   dev_priv->display.calc_voltage_level = tgl_calc_voltage_level;
+   dev_priv->cdclk_funcs.set_cdclk = bxt_set_cdclk;
+   dev_priv->cdclk_funcs.bw_calc_min_cdclk = skl_bw_calc_min_cdclk;
+   dev_priv->cdclk_funcs.modeset_calc_cdclk = 
bxt_modeset_calc_cdclk;
+   dev_priv->cdclk_funcs.calc_voltage_level = 
tgl_calc_voltage_level;
/* Wa_22011320316:adl-p[a0] */
if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_B0))
dev_priv->cdclk.table = adlp_a_step_cdclk_table;
else
dev_priv->cdclk.table = adlp_cdclk_table;
} else if (IS_ROCKETLAKE(dev_priv)) {
-   dev_priv->display.set_cdclk = bxt_set_cdclk;
-   dev_priv->display.bw_calc_min_cdclk = skl_bw_calc_min_cdclk;
-   dev_priv->display.modeset_calc_cdclk = bxt_modeset_calc_cdclk;
-   dev_priv->display.calc_voltage_level = tgl_calc_voltage_level;
+   dev_priv->cdclk_funcs.set_cdclk = bxt_set_cdclk;
+   dev_priv->cdclk_funcs.bw_calc_min_cdclk = skl_bw_calc_min_cdclk;
+   dev_priv->cdclk_funcs.modeset_calc_cdclk = 
bxt_modeset_calc_cdclk;
+   dev_priv->cdclk_funcs.calc_voltage_level = 
tgl_calc_voltage_leve

[Intel-gfx] [PATCH 12/24] drm/i915: split irq hotplug function from display vtable

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This provide a service from irq to display, so make it separate

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c |  4 ++--
 drivers/gpu/drm/i915/i915_drv.h  |  9 -
 drivers/gpu/drm/i915/i915_irq.c  | 14 +++---
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 47c85ac97c87..05f76aba4f8a 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -215,8 +215,8 @@ intel_hpd_irq_storm_switch_to_polling(struct 
drm_i915_private *dev_priv)
 
 static void intel_hpd_irq_setup(struct drm_i915_private *i915)
 {
-   if (i915->display_irqs_enabled && i915->display.hpd_irq_setup)
-   i915->display.hpd_irq_setup(i915);
+   if (i915->display_irqs_enabled && i915->hotplug_funcs.hpd_irq_setup)
+   i915->hotplug_funcs.hpd_irq_setup(i915);
 }
 
 static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 42a925f701bd..0f549187b2f3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -384,6 +384,10 @@ struct intel_cdclk_funcs {
u8 (*calc_voltage_level)(int cdclk);
 };
 
+struct intel_hotplug_funcs {
+   void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
+};
+
 struct drm_i915_display_funcs {
/* Returns the active state of the crtc, and if the crtc is active,
 * fills out the pipe-config with the hw state. */
@@ -401,7 +405,7 @@ struct drm_i915_display_funcs {
 
void (*fdi_link_train)(struct intel_crtc *crtc,
   const struct intel_crtc_state *crtc_state);
-   void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
+
/* clock updates for mode set */
/* cursor updates */
/* render clock increase/decrease */
@@ -978,6 +982,9 @@ struct drm_i915_private {
/* pm display functions */
struct drm_i915_wm_disp_funcs wm_disp;
 
+   /* irq display functions */
+   struct intel_hotplug_funcs hotplug_funcs;
+
/* Display functions */
struct drm_i915_display_funcs display;
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 0a1681384c84..c35065f8f429 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4395,20 +4395,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
if (HAS_GMCH(dev_priv)) {
if (I915_HAS_HOTPLUG(dev_priv))
-   dev_priv->display.hpd_irq_setup = i915_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
i915_hpd_irq_setup;
} else {
if (HAS_PCH_DG1(dev_priv))
-   dev_priv->display.hpd_irq_setup = dg1_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
dg1_hpd_irq_setup;
else if (DISPLAY_VER(dev_priv) >= 11)
-   dev_priv->display.hpd_irq_setup = gen11_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
gen11_hpd_irq_setup;
else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
-   dev_priv->display.hpd_irq_setup = bxt_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
bxt_hpd_irq_setup;
else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
-   dev_priv->display.hpd_irq_setup = icp_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
icp_hpd_irq_setup;
else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
-   dev_priv->display.hpd_irq_setup = spt_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
spt_hpd_irq_setup;
else
-   dev_priv->display.hpd_irq_setup = ilk_hpd_irq_setup;
+   dev_priv->hotplug_funcs.hpd_irq_setup = 
ilk_hpd_irq_setup;
}
 }
 
-- 
2.30.2



[Intel-gfx] [PATCH 13/24] drm/i915: split fdi link training from display vtable.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

It may make sense to merge this with display again later,
however the fdi use of the vtable is limited to only a
few generations.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_fdi.c |  8 
 drivers/gpu/drm/i915/i915_drv.h  | 11 ---
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c 
b/drivers/gpu/drm/i915/display/intel_fdi.c
index 339243399a65..94bb7e039fe7 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -15,7 +15,7 @@ void intel_fdi_link_train(struct intel_crtc *crtc,
 {
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-   dev_priv->display.fdi_link_train(crtc, crtc_state);
+   dev_priv->fdi_funcs.fdi_link_train(crtc, crtc_state);
 }
 
 /* units of 100MHz */
@@ -1017,11 +1017,11 @@ void
 intel_fdi_init_hook(struct drm_i915_private *dev_priv)
 {
if (IS_IRONLAKE(dev_priv)) {
-   dev_priv->display.fdi_link_train = ilk_fdi_link_train;
+   dev_priv->fdi_funcs.fdi_link_train = ilk_fdi_link_train;
} else if (IS_SANDYBRIDGE(dev_priv)) {
-   dev_priv->display.fdi_link_train = gen6_fdi_link_train;
+   dev_priv->fdi_funcs.fdi_link_train = gen6_fdi_link_train;
} else if (IS_IVYBRIDGE(dev_priv)) {
/* FIXME: detect B0+ stepping and use auto training */
-   dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
+   dev_priv->fdi_funcs.fdi_link_train = ivb_manual_fdi_link_train;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 0f549187b2f3..8f2d8a3888c2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -388,6 +388,11 @@ struct intel_hotplug_funcs {
void (*hpd_irq_setup)(struct drm_i915_private *dev_priv);
 };
 
+struct intel_fdi_funcs {
+   void (*fdi_link_train)(struct intel_crtc *crtc,
+  const struct intel_crtc_state *crtc_state);
+};
+
 struct drm_i915_display_funcs {
/* Returns the active state of the crtc, and if the crtc is active,
 * fills out the pipe-config with the hw state. */
@@ -403,9 +408,6 @@ struct drm_i915_display_funcs {
void (*commit_modeset_enables)(struct intel_atomic_state *state);
void (*commit_modeset_disables)(struct intel_atomic_state *state);
 
-   void (*fdi_link_train)(struct intel_crtc *crtc,
-  const struct intel_crtc_state *crtc_state);
-
/* clock updates for mode set */
/* cursor updates */
/* render clock increase/decrease */
@@ -985,6 +987,9 @@ struct drm_i915_private {
/* irq display functions */
struct intel_hotplug_funcs hotplug_funcs;
 
+   /* fdi display functions */
+   struct intel_fdi_funcs fdi_funcs;
+
/* Display functions */
struct drm_i915_display_funcs display;
 
-- 
2.30.2



[Intel-gfx] [PATCH 14/24] drm/i915: split the dpll clock compute out from display vtable.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

this single function might be possible to merge later, but
for now it's simple to just split it out.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_display.c |  6 +++---
 drivers/gpu/drm/i915/display/intel_dpll.c| 16 
 drivers/gpu/drm/i915/i915_drv.h  |  8 +++-
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display.c 
b/drivers/gpu/drm/i915/display/intel_display.c
index 6ed32100924c..49d7f0468002 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -6821,10 +6821,10 @@ static int intel_crtc_atomic_check(struct 
intel_atomic_state *state,
crtc_state->update_wm_post = true;
 
if (mode_changed && crtc_state->hw.enable &&
-   dev_priv->display.crtc_compute_clock &&
+   dev_priv->dpll_funcs.crtc_compute_clock &&
!crtc_state->bigjoiner_slave &&
!drm_WARN_ON(&dev_priv->drm, crtc_state->shared_dpll)) {
-   ret = dev_priv->display.crtc_compute_clock(crtc_state);
+   ret = dev_priv->dpll_funcs.crtc_compute_clock(crtc_state);
if (ret)
return ret;
}
@@ -8851,7 +8851,7 @@ static void intel_modeset_clear_plls(struct 
intel_atomic_state *state)
struct intel_crtc *crtc;
int i;
 
-   if (!dev_priv->display.crtc_compute_clock)
+   if (!dev_priv->dpll_funcs.crtc_compute_clock)
return;
 
for_each_new_intel_crtc_in_state(state, crtc, new_crtc_state, i) {
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c 
b/drivers/gpu/drm/i915/display/intel_dpll.c
index 210f91f4a576..9326c7cbb05c 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -1367,21 +1367,21 @@ void
 intel_dpll_init_clock_hook(struct drm_i915_private *dev_priv)
 {
if (DISPLAY_VER(dev_priv) >= 9 || HAS_DDI(dev_priv))
-   dev_priv->display.crtc_compute_clock = hsw_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
hsw_crtc_compute_clock;
else if (HAS_PCH_SPLIT(dev_priv))
-   dev_priv->display.crtc_compute_clock = ilk_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
ilk_crtc_compute_clock;
else if (IS_CHERRYVIEW(dev_priv))
-   dev_priv->display.crtc_compute_clock = chv_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
chv_crtc_compute_clock;
else if (IS_VALLEYVIEW(dev_priv))
-   dev_priv->display.crtc_compute_clock = vlv_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
vlv_crtc_compute_clock;
else if (IS_G4X(dev_priv))
-   dev_priv->display.crtc_compute_clock = g4x_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
g4x_crtc_compute_clock;
else if (IS_PINEVIEW(dev_priv))
-   dev_priv->display.crtc_compute_clock = pnv_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
pnv_crtc_compute_clock;
else if (DISPLAY_VER(dev_priv) != 2)
-   dev_priv->display.crtc_compute_clock = i9xx_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
i9xx_crtc_compute_clock;
else
-   dev_priv->display.crtc_compute_clock = i8xx_crtc_compute_clock;
+   dev_priv->dpll_funcs.crtc_compute_clock = 
i8xx_crtc_compute_clock;
 }
 
 static bool i9xx_has_pps(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8f2d8a3888c2..51bbff28bb12 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -393,6 +393,10 @@ struct intel_fdi_funcs {
   const struct intel_crtc_state *crtc_state);
 };
 
+struct intel_dpll_funcs {
+   int (*crtc_compute_clock)(struct intel_crtc_state *crtc_state);
+};
+
 struct drm_i915_display_funcs {
/* Returns the active state of the crtc, and if the crtc is active,
 * fills out the pipe-config with the hw state. */
@@ -400,7 +404,6 @@ struct drm_i915_display_funcs {
struct intel_crtc_state *);
void (*get_initial_plane_config)(struct intel_crtc *,
 struct intel_initial_plane_config *);
-   int (*crtc_compute_clock)(struct intel_crtc_state *crtc_state);
void (*crtc_enable)(struct intel_atomic_state *state,
struct intel_crtc *crtc);
void (*crtc_disable)(struct intel_atomic_state *state,
@@ -990,6 +993,9 @@ struct drm_i915_private {
/* fdi display functions */
struct intel_fdi_funcs fdi_funcs;
 
+   /* display pll funcs */
+   struct intel_dpll_funcs dpll_funcs

[Intel-gfx] [PATCH 15/24] drm/i915: constify fdi link training vtable

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

Put the vtable into ro memory.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_fdi.c | 20 
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c 
b/drivers/gpu/drm/i915/display/intel_fdi.c
index 94bb7e039fe7..148fb50035ff 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -15,7 +15,7 @@ void intel_fdi_link_train(struct intel_crtc *crtc,
 {
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 
-   dev_priv->fdi_funcs.fdi_link_train(crtc, crtc_state);
+   dev_priv->fdi_funcs->fdi_link_train(crtc, crtc_state);
 }
 
 /* units of 100MHz */
@@ -1013,15 +1013,27 @@ void lpt_fdi_program_mphy(struct drm_i915_private 
*dev_priv)
intel_sbi_write(dev_priv, 0x21EC, tmp, SBI_MPHY);
 }
 
+static const struct intel_fdi_funcs ilk_funcs = {
+   .fdi_link_train = ilk_fdi_link_train,
+};
+
+static const struct intel_fdi_funcs gen6_funcs = {
+   .fdi_link_train = gen6_fdi_link_train,
+};
+
+static const struct intel_fdi_funcs ivb_funcs = {
+   .fdi_link_train = ivb_manual_fdi_link_train,
+};
+
 void
 intel_fdi_init_hook(struct drm_i915_private *dev_priv)
 {
if (IS_IRONLAKE(dev_priv)) {
-   dev_priv->fdi_funcs.fdi_link_train = ilk_fdi_link_train;
+   dev_priv->fdi_funcs = &ilk_funcs;
} else if (IS_SANDYBRIDGE(dev_priv)) {
-   dev_priv->fdi_funcs.fdi_link_train = gen6_fdi_link_train;
+   dev_priv->fdi_funcs = &gen6_funcs;
} else if (IS_IVYBRIDGE(dev_priv)) {
/* FIXME: detect B0+ stepping and use auto training */
-   dev_priv->fdi_funcs.fdi_link_train = ivb_manual_fdi_link_train;
+   dev_priv->fdi_funcs = &ivb_funcs;
}
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 51bbff28bb12..4cea7fa274f1 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -991,7 +991,7 @@ struct drm_i915_private {
struct intel_hotplug_funcs hotplug_funcs;
 
/* fdi display functions */
-   struct intel_fdi_funcs fdi_funcs;
+   const struct intel_fdi_funcs *fdi_funcs;
 
/* display pll funcs */
struct intel_dpll_funcs dpll_funcs;
-- 
2.30.2



[Intel-gfx] [PATCH 16/24] drm/i915: constify hotplug function vtable.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

Use a macro to avoid mistakes, this type of macro is only used
in a couple of places.

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_hotplug.c |  4 +--
 drivers/gpu/drm/i915/i915_drv.h  |  2 +-
 drivers/gpu/drm/i915/i915_irq.c  | 28 +++-
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c 
b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 05f76aba4f8a..3c1cec953b42 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -215,8 +215,8 @@ intel_hpd_irq_storm_switch_to_polling(struct 
drm_i915_private *dev_priv)
 
 static void intel_hpd_irq_setup(struct drm_i915_private *i915)
 {
-   if (i915->display_irqs_enabled && i915->hotplug_funcs.hpd_irq_setup)
-   i915->hotplug_funcs.hpd_irq_setup(i915);
+   if (i915->display_irqs_enabled && i915->hotplug_funcs->hpd_irq_setup)
+   i915->hotplug_funcs->hpd_irq_setup(i915);
 }
 
 static void intel_hpd_irq_storm_reenable_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4cea7fa274f1..0870c8b561c3 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -988,7 +988,7 @@ struct drm_i915_private {
struct drm_i915_wm_disp_funcs wm_disp;
 
/* irq display functions */
-   struct intel_hotplug_funcs hotplug_funcs;
+   const struct intel_hotplug_funcs *hotplug_funcs;
 
/* fdi display functions */
const struct intel_fdi_funcs *fdi_funcs;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index c35065f8f429..77680bca46ee 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4345,6 +4345,20 @@ static irqreturn_t i965_irq_handler(int irq, void *arg)
return ret;
 }
 
+#define HPD_FUNCS(platform) \
+static const struct intel_hotplug_funcs platform##_hpd_funcs = { \
+   .hpd_irq_setup = platform##_hpd_irq_setup,   \
+}
+
+HPD_FUNCS(i915);
+HPD_FUNCS(dg1);
+HPD_FUNCS(gen11);
+HPD_FUNCS(bxt);
+HPD_FUNCS(icp);
+HPD_FUNCS(spt);
+HPD_FUNCS(ilk);
+#undef HPD_FUNCS
+
 /**
  * intel_irq_init - initializes irq support
  * @dev_priv: i915 device instance
@@ -4395,20 +4409,20 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 
if (HAS_GMCH(dev_priv)) {
if (I915_HAS_HOTPLUG(dev_priv))
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
i915_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &i915_hpd_funcs;
} else {
if (HAS_PCH_DG1(dev_priv))
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
dg1_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &dg1_hpd_funcs;
else if (DISPLAY_VER(dev_priv) >= 11)
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
gen11_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &gen11_hpd_funcs;
else if (IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv))
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
bxt_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &bxt_hpd_funcs;
else if (INTEL_PCH_TYPE(dev_priv) >= PCH_ICP)
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
icp_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &icp_hpd_funcs;
else if (INTEL_PCH_TYPE(dev_priv) >= PCH_SPT)
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
spt_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &spt_hpd_funcs;
else
-   dev_priv->hotplug_funcs.hpd_irq_setup = 
ilk_hpd_irq_setup;
+   dev_priv->hotplug_funcs = &ilk_hpd_funcs;
}
 }
 
-- 
2.30.2



[Intel-gfx] [PATCH 17/24] drm/i915: constify color function vtable.

2021-09-14 Thread Jani Nikula
From: Dave Airlie 

This clarifies quite well what functions get used on what platforms
instead of having to decipher the old tree.

v2: fixed IVB mistake (Jani)

Reviewed-by: Jani Nikula 
Signed-off-by: Dave Airlie 
Signed-off-by: Jani Nikula 
---
 drivers/gpu/drm/i915/display/intel_color.c | 138 ++---
 drivers/gpu/drm/i915/i915_drv.h|   2 +-
 2 files changed, 93 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_color.c 
b/drivers/gpu/drm/i915/display/intel_color.c
index ed79075158dd..f5923f1c38bd 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -1137,14 +1137,14 @@ void intel_color_load_luts(const struct 
intel_crtc_state *crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   dev_priv->color_funcs.load_luts(crtc_state);
+   dev_priv->color_funcs->load_luts(crtc_state);
 }
 
 void intel_color_commit(const struct intel_crtc_state *crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   dev_priv->color_funcs.color_commit(crtc_state);
+   dev_priv->color_funcs->color_commit(crtc_state);
 }
 
 static bool intel_can_preload_luts(const struct intel_crtc_state 
*new_crtc_state)
@@ -1200,15 +1200,15 @@ int intel_color_check(struct intel_crtc_state 
*crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   return dev_priv->color_funcs.color_check(crtc_state);
+   return dev_priv->color_funcs->color_check(crtc_state);
 }
 
 void intel_color_get_config(struct intel_crtc_state *crtc_state)
 {
struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
 
-   if (dev_priv->color_funcs.read_luts)
-   dev_priv->color_funcs.read_luts(crtc_state);
+   if (dev_priv->color_funcs->read_luts)
+   dev_priv->color_funcs->read_luts(crtc_state);
 }
 
 static bool need_plane_update(struct intel_plane *plane,
@@ -2092,6 +2092,76 @@ static void icl_read_luts(struct intel_crtc_state 
*crtc_state)
}
 }
 
+static const struct intel_color_funcs chv_color_funcs = {
+   .color_check = chv_color_check,
+   .color_commit = i9xx_color_commit,
+   .load_luts = chv_load_luts,
+   .read_luts = chv_read_luts,
+};
+
+static const struct intel_color_funcs i965_color_funcs = {
+   .color_check = i9xx_color_check,
+   .color_commit = i9xx_color_commit,
+   .load_luts = i965_load_luts,
+   .read_luts = i965_read_luts,
+};
+
+static const struct intel_color_funcs i9xx_color_funcs = {
+   .color_check = i9xx_color_check,
+   .color_commit = i9xx_color_commit,
+   .load_luts = i9xx_load_luts,
+   .read_luts = i9xx_read_luts,
+};
+
+static const struct intel_color_funcs icl_color_funcs = {
+   .color_check = icl_color_check,
+   .color_commit = skl_color_commit,
+   .load_luts = icl_load_luts,
+   .read_luts = icl_read_luts,
+};
+
+static const struct intel_color_funcs glk_color_funcs = {
+   .color_check = glk_color_check,
+   .color_commit = skl_color_commit,
+   .load_luts = glk_load_luts,
+   .read_luts = glk_read_luts,
+};
+
+static const struct intel_color_funcs skl_color_funcs = {
+   .color_check = ivb_color_check,
+   .color_commit = skl_color_commit,
+   .load_luts = bdw_load_luts,
+   .read_luts = NULL,
+};
+
+static const struct intel_color_funcs bdw_color_funcs = {
+   .color_check = ivb_color_check,
+   .color_commit = hsw_color_commit,
+   .load_luts = bdw_load_luts,
+   .read_luts = NULL,
+};
+
+static const struct intel_color_funcs hsw_color_funcs = {
+   .color_check = ivb_color_check,
+   .color_commit = hsw_color_commit,
+   .load_luts = ivb_load_luts,
+   .read_luts = NULL,
+};
+
+static const struct intel_color_funcs ivb_color_funcs = {
+   .color_check = ivb_color_check,
+   .color_commit = ilk_color_commit,
+   .load_luts = ivb_load_luts,
+   .read_luts = NULL,
+};
+
+static const struct intel_color_funcs ilk_color_funcs = {
+   .color_check = ilk_color_check,
+   .color_commit = ilk_color_commit,
+   .load_luts = ilk_load_luts,
+   .read_luts = ilk_read_luts,
+};
+
 void intel_color_init(struct intel_crtc *crtc)
 {
struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
@@ -2101,52 +2171,28 @@ void intel_color_init(struct intel_crtc *crtc)
 
if (HAS_GMCH(dev_priv)) {
if (IS_CHERRYVIEW(dev_priv)) {
-   dev_priv->color_funcs.color_check = chv_color_check;
-   dev_priv->color_funcs.color_commit = i9xx_color_commit;
-   dev_priv->color_funcs.load_luts = chv_load_luts;
-   dev_priv->color_funcs.read_luts = chv_read_luts;
+   dev_priv->color_funcs = &chv_color_funcs;
} else if (DISPLAY_VER(dev_p

  1   2   >