Re: [Intel-gfx] [PATCH v1 2/4] drm/i915: Add missing mask when reading GEN12_DSMBASE

2022-09-19 Thread Caz Yokoyama
Reviewed-by:  Caz Yokoyama 
Better to be safe.


On Thu, Sep 15, 2022 at 1:40 PM Lucas De Marchi 
wrote:

> DSMBASE register is defined so BDSM bitfield contains the bits 63 to 20
> of the base address of stolen. For the supported platforms bits 0-19 are
> zero but that may not be true in future. Add the missing mask.
>
> Signed-off-by: Lucas De Marchi 
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index 42f4769bb4ac..c34065fe2ecc 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -814,7 +814,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private
> *i915, u16 type,
> return ERR_PTR(-ENXIO);
>
> /* Use DSM base address instead for stolen memory */
> -   dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE);
> +   dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) &
> GEN12_BDSM_MASK;
> if (IS_DG1(uncore->i915)) {
> lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
> if (WARN_ON(lmem_size < dsm_base))
> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> b/drivers/gpu/drm/i915/i915_reg.h
> index 1a9bd829fc7e..0301874c76ba 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -7953,6 +7953,7 @@ enum skl_power_gate {
>
>  #define GEN12_GSMBASE  _MMIO(0x108100)
>  #define GEN12_DSMBASE  _MMIO(0x1080C0)
> +#define   GEN12_BDSM_MASK  GENMASK(63, 20)
>
>  #define XEHP_CLOCK_GATE_DIS_MMIO(0x101014)
>  #define   SGSI_SIDECLK_DIS REG_BIT(17)
>
> --
> b4 0.10.0-dev-bbe61
>


-- 
-caz, caz at caztech dot com, 503-six one zero - five six nine nine(m)


Re: [Intel-gfx] [PATCH 12/23] drm/i915/mtl: Fix rawclk for Meteorlake PCH

2022-08-04 Thread Caz Yokoyama
> MTL has a fixed rawclk of 38400Mhz. Register does not need to be
> + * MTL always uses a 38.4 MHz rawclk.  The bspec tells us
Mismatch between commit message and comment. Probably
38400Mhz -> 38400kHz
-caz

On Mon, Aug 1, 2022 at 8:29 PM Matt Roper  wrote:

> On Wed, Jul 27, 2022 at 06:34:09PM -0700, Radhakrishna Sripada wrote:
> > From: Clint Taylor 
> >
> > MTL has a fixed rawclk of 38400Mhz. Register does not need to be
> > reprogrammed.
> >
> > Bspec: 49304
> >
> > Signed-off-by: Clint Taylor 
> > ---
> >  drivers/gpu/drm/i915/display/intel_cdclk.c | 7 +++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c
> b/drivers/gpu/drm/i915/display/intel_cdclk.c
> > index 86a22c3766e5..390a198b0011 100644
> > --- a/drivers/gpu/drm/i915/display/intel_cdclk.c
> > +++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
> > @@ -3036,6 +3036,13 @@ u32 intel_read_rawclk(struct drm_i915_private
> *dev_priv)
> >
> >   if (INTEL_PCH_TYPE(dev_priv) >= PCH_DG1)
> >   freq = dg1_rawclk(dev_priv);
> > + else if (INTEL_PCH_TYPE(dev_priv) >= PCH_MTP)
> > + /*
> > + * MTL always uses a 38.4 MHz rawclk.  The bspec tells us
>
> Indentation isn't quite right here.
>
> Patch is also missing your s-o-b.
>
> With those fixed,
>
> Reviewed-by: Matt Roper 
>
> > + * "RAWCLK_FREQ defaults to the values for 38.4 and does
> > + * not need to be programmed."
> > + */
> > + freq = 38400;
> >   else if (INTEL_PCH_TYPE(dev_priv) >= PCH_CNP)
> >   freq = cnp_rawclk(dev_priv);
> >   else if (HAS_PCH_SPLIT(dev_priv))
> > --
> > 2.25.1
> >
>
> --
> Matt Roper
> Graphics Software Engineer
> VTT-OSGC Platform Enablement
> Intel Corporation
>


-- 
-caz, caz at caztech dot com, 503-six one zero - five six nine nine(m)


Re: [Intel-gfx] [PATCH 15/23] drm/i915/mtl: Obtain SAGV values from MMIO instead of GT pcode mailbox

2022-08-04 Thread Caz Yokoyama
On Wed, Jul 27, 2022 at 6:34 PM Radhakrishna Sripada <
radhakrishna.srip...@intel.com> wrote:

> From Meteorlake, Latency Level, SAGV bloack time are read from
> LATENCY_SAGV register instead of the GT driver pcode mailbox. DDR type
> and QGV information are also tob read from Mem SS registers.
>
> Bspec: 49324, 64636
>
> Cc: Matt Roper 
> Original Author: Caz Yokoyama
> Signed-off-by: José Roberto de Souza 
> Signed-off-by: Radhakrishna Sripada 
> ---
>  drivers/gpu/drm/i915/display/intel_bw.c | 49 +++--
>  drivers/gpu/drm/i915/display/intel_bw.h |  9 +
>  drivers/gpu/drm/i915/i915_reg.h | 16 
>  drivers/gpu/drm/i915/intel_dram.c   | 41 -
>  drivers/gpu/drm/i915/intel_pm.c |  8 +++-
>  5 files changed, 110 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.c
> b/drivers/gpu/drm/i915/display/intel_bw.c
> index 79269d2c476b..8bbf47da1716 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.c
> +++ b/drivers/gpu/drm/i915/display/intel_bw.c
> @@ -15,11 +15,6 @@
>  #include "intel_pcode.h"
>  #include "intel_pm.h"
>
> -/* Parameters for Qclk Geyserville (QGV) */
> -struct intel_qgv_point {
> -   u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
> -};
> -
>  struct intel_psf_gv_point {
> u8 clk; /* clock in multiples of 16. MHz */
>  };
> @@ -137,6 +132,42 @@ int icl_pcode_restrict_qgv_points(struct
> drm_i915_private *dev_priv,
> return 0;
>  }
>
> +static int mtl_read_qgv_point_info(struct drm_i915_private *dev_priv,
>
No need to return value. i.e.

static void
mtl_read_qgv_point_info(struct drm_i915_private *dev_priv,
-caz

+  struct intel_qgv_point *sp, int point)
> +{
> +   u32 val, val2;
> +   u16 dclk;
> +
> +   val = intel_uncore_read(_priv->uncore,
> +   MTL_MEM_SS_INFO_QGV_POINT(point, 0));
> +   val2 = intel_uncore_read(_priv->uncore,
> +MTL_MEM_SS_INFO_QGV_POINT(point, 1));
> +   dclk = REG_FIELD_GET(MTL_DCLK_MASK, val);
> +   sp->dclk = DIV_ROUND_UP((16667 * dclk) +  500, 1000);
> +   sp->t_rp = REG_FIELD_GET(MTL_TRP_MASK, val);
> +   sp->t_rcd = REG_FIELD_GET(MTL_TRCD_MASK, val);
> +
> +   sp->t_rdpre = REG_FIELD_GET(MTL_TRDPRE_MASK, val2);
> +   sp->t_ras = REG_FIELD_GET(MTL_TRAS_MASK, val2);
> +
> +   sp->t_rc = sp->t_rp + sp->t_ras;
> +
> +   return 0;
> +}
> +
> +int
> +intel_read_qgv_point_info(struct drm_i915_private *dev_priv,
> + struct intel_qgv_point *sp,
> + int point)
> +{
> +   if (DISPLAY_VER(dev_priv) >= 14)
> +   return mtl_read_qgv_point_info(dev_priv, sp, point);
> +   else if (IS_DG1(dev_priv))
> +   return dg1_mchbar_read_qgv_point_info(dev_priv, sp, point);
> +   else
> +   return icl_pcode_read_qgv_point_info(dev_priv, sp, point);
> +}
> +
>  static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
>   struct intel_qgv_info *qi,
>   bool is_y_tile)
> @@ -193,11 +224,7 @@ static int icl_get_qgv_points(struct drm_i915_private
> *dev_priv,
> for (i = 0; i < qi->num_points; i++) {
> struct intel_qgv_point *sp = >points[i];
>
> -   if (IS_DG1(dev_priv))
> -   ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp,
> i);
> -   else
> -   ret = icl_pcode_read_qgv_point_info(dev_priv, sp,
> i);
> -
> +   ret = intel_read_qgv_point_info(dev_priv, sp, i);
> if (ret)
> return ret;
>
> @@ -560,7 +587,7 @@ void intel_bw_init_hw(struct drm_i915_private
> *dev_priv)
>
> if (IS_DG2(dev_priv))
> dg2_get_bw_info(dev_priv);
> -   else if (IS_ALDERLAKE_P(dev_priv))
> +   else if (DISPLAY_VER(dev_priv) >= 13 || IS_ALDERLAKE_P(dev_priv))
> tgl_get_bw_info(dev_priv, _sa_info);
> else if (IS_ALDERLAKE_S(dev_priv))
> tgl_get_bw_info(dev_priv, _sa_info);
> diff --git a/drivers/gpu/drm/i915/display/intel_bw.h
> b/drivers/gpu/drm/i915/display/intel_bw.h
> index cb7ee3a24a58..b4c6665b0cf0 100644
> --- a/drivers/gpu/drm/i915/display/intel_bw.h
> +++ b/drivers/gpu/drm/i915/display/intel_bw.h
> @@ -46,6 +46,11 @@ struct intel_bw_state {
> u8 num_active_planes[I915_MAX_PIPES];
>  };
>
> +/* Parameters for Qclk Geyserville (QGV) */
> +struct intel_qgv_point {
>

Re: [Intel-gfx] [PATCH v2 4/4] drm/i915/display: Remove MBUS joining invalid TODOs

2022-03-23 Thread Caz Yokoyama
On Tue, Mar 22, 2022 at 2:45 PM José Roberto de Souza 
wrote:

> skl_compute_ddb() will for a modeset in all pipes when MBUS joining
> changes between states, so all pipes will be disabled, have all
> MBUS related registers updated and then each pipe enabled.
>
I am not clear what you want to say here. Could you rephrase above 3 lines?


> So no vblank syncronization is necessary and here droping those TODOs.
>

  dropping
-caz


>
> Cc: Ville Syrjälä 
> Signed-off-by: José Roberto de Souza 
> ---
>  drivers/gpu/drm/i915/intel_pm.c | 5 -
>  1 file changed, 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c
> b/drivers/gpu/drm/i915/intel_pm.c
> index cf290bb704221..9ccf0f062862c 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -6066,7 +6066,6 @@ skl_compute_ddb(struct intel_atomic_state *state)
> return ret;
>
> if (old_dbuf_state->joined_mbus !=
> new_dbuf_state->joined_mbus) {
> -   /* TODO: Implement vblank synchronized MBUS
> joining changes */
> ret = intel_modeset_all_pipes(state);
> if (ret)
> return ret;
> @@ -8195,10 +8194,6 @@ static void update_mbus_pre_enable(struct
> intel_atomic_state *state)
> if (!HAS_MBUS_JOINING(dev_priv))
> return;
>
> -   /*
> -* TODO: Implement vblank synchronized MBUS joining changes.
> -* Must be properly coordinated with dbuf reprogramming.
> -*/
> if (dbuf_state->joined_mbus) {
> mbus_ctl = MBUS_HASHING_MODE_1x4 | MBUS_JOIN |
> MBUS_JOIN_PIPE_SELECT_NONE;
> --
> 2.35.1
>
>

-- 
-caz, caz at caztech dot com, 503-six one zero - five six nine nine(m)


Re: [Intel-gfx] [PATCH 16/21] drm/i915/adl_s: MCHBAR memory info registers are moved

2020-11-20 Thread Caz Yokoyama
On Fri, 2020-11-20 at 12:18 -0800, Lucas De Marchi wrote:
> On Tue, Nov 17, 2020 at 10:50:24AM -0800, Aditya Swarup wrote:
> > From: Caz Yokoyama 
> > 
> > The crwebview indicates on ADL-S that some of our MCHBAR
> > registers have moved from their traditional 0x50XX offsets to
> > new locations. The meaning and bit layout of the registers
> > remain same.
> > 
> > Cc: Lucas De Marchi 
> > Cc: Jani Nikula 
> > Cc: Ville Syrjälä 
> > Cc: Imre Deak 
> > Cc: Matt Roper 
> > Signed-off-by: Yokoyama, Caz 
> > Signed-off-by: Aditya Swarup 
> > ---
> > drivers/gpu/drm/i915/i915_reg.h   |  5 +
> > drivers/gpu/drm/i915/intel_dram.c | 18 +++---
> > 2 files changed, 20 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 4c8d0d84af6a..6abba59592f7 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -10863,6 +10863,8 @@ enum skl_power_gate {
> > #define  SKL_DRAM_DDR_TYPE_LPDDR3   (2 << 0)
> > #define  SKL_DRAM_DDR_TYPE_LPDDR4   (3 << 0)
> > 
> > +#define  ADLS_MAD_INTER_CHANNEL_0_0_0_MCHBAR
> > _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x6048)
> > +
> > #define SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN_MMIO(MCHBAR_MI
> > RROR_BASE_SNB + 0x500C)
> > #define SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN_MMIO(MCHBAR_MI
> > RROR_BASE_SNB + 0x5010)
> > #define  SKL_DRAM_S_SHIFT   16
> > @@ -10890,6 +10892,9 @@ enum skl_power_gate {
> > #define  CNL_DRAM_RANK_3(0x2 << 9)
> > #define  CNL_DRAM_RANK_4(0x3 << 9)
> > 
> > +#define ADLS_MAD_DIMM_CH0_0_0_0_MCHBAR _MMIO(MCHBAR_MI
> > RROR_BASE_SNB + 0x6054)
> > +#define ADLS_MAD_DIMM_CH1_0_0_0_MCHBAR _MMIO(MCHBAR_MI
> > RROR_BASE_SNB + 0x6058)
> > +
> > /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using
> > this register,
> >  * since on HSW we can't write to it using I915_WRITE. */
> > #define D_COMP_HSW  _MMIO(MCHBAR_MIRROR_BASE_SNB +
> > 0x5F0C)
> > diff --git a/drivers/gpu/drm/i915/intel_dram.c
> > b/drivers/gpu/drm/i915/intel_dram.c
> > index 4754296a250e..e7427e5f4130 100644
> > --- a/drivers/gpu/drm/i915/intel_dram.c
> > +++ b/drivers/gpu/drm/i915/intel_dram.c
> > @@ -184,13 +184,21 @@ skl_dram_get_channels_info(struct
> > drm_i915_private *i915)
> > u32 val;
> > int ret;
> > 
> > -   val = intel_uncore_read(>uncore,
> > +   if (IS_ALDERLAKE_S(i915))
> > +   val = intel_uncore_read(>uncore,
> > +   ADLS_MAD_DIMM_CH0_0_0_0_MCHBAR);
> > +   else
> > +   val = intel_uncore_read(>uncore,
> > SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN);
> > ret = skl_dram_get_channel_info(i915, , 0, val);
> > if (ret == 0)
> > dram_info->num_channels++;
> > 
> > -   val = intel_uncore_read(>uncore,
> > +   if (IS_ALDERLAKE_S(i915))
> > +   val = intel_uncore_read(>uncore,
> > +   ADLS_MAD_DIMM_CH1_0_0_0_MCHBAR);
> > +   else
> > +   val = intel_uncore_read(>uncore,
> > SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN);
> 
> probably better to:
> 
> u32 ch0, ch1;
> 
> and then keep the reads together in a single if/else chain.
> Or use i915_reg_t ch0_reg, ch1_reg
Agree/Better idea. When I worked for, I only concerned how to minimize
my patch and not think about whether the code is simple and readable. 
-caz

> 
> Lucas De Marchi
> 
> > ret = skl_dram_get_channel_info(i915, , 1, val);
> > if (ret == 0)
> > @@ -231,7 +239,11 @@ skl_get_dram_type(struct drm_i915_private
> > *i915)
> > {
> > u32 val;
> > 
> > -   val = intel_uncore_read(>uncore,
> > +   if (IS_ALDERLAKE_S(i915))
> > +   val = intel_uncore_read(>uncore,
> > +   ADLS_MAD_INTER_CHANNEL_0_0_0_MCHBAR);
> > +   else
> > +   val = intel_uncore_read(>uncore,
> > SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMA
> > IN);
> > 
> > switch (val & SKL_DRAM_DDR_TYPE_MASK) {
> > -- 
> > 2.27.0
> > 

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 01/23] drm/i915/rkl: Add RKL platform info and PCI ids

2020-05-01 Thread Caz Yokoyama
Matt,
This patch used to have version information such "v7: Add missing pipe
mask". Why they are removed? For power on?

Otherwise
Acked-by: Caz Yokoyama 
-caz

On Fri, 2020-05-01 at 10:07 -0700, Matt Roper wrote:
> Introduce the basic platform definition, macros, and PCI IDs.
> 
> Bspec: 44501
> Cc: Lucas De Marchi 
> Cc: Caz Yokoyama 
> Cc: Aditya Swarup 
> Signed-off-by: Matt Roper 
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  8 
>  drivers/gpu/drm/i915/i915_pci.c  | 10 ++
>  drivers/gpu/drm/i915/intel_device_info.c |  1 +
>  drivers/gpu/drm/i915/intel_device_info.h |  1 +
>  include/drm/i915_pciids.h|  9 +
>  5 files changed, 29 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> index 6af69555733e..1ba77283123d 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1406,6 +1406,7 @@ IS_SUBPLATFORM(const struct drm_i915_private
> *i915,
>  #define IS_ICELAKE(dev_priv) IS_PLATFORM(dev_priv, INTEL_ICELAKE)
>  #define IS_ELKHARTLAKE(dev_priv) IS_PLATFORM(dev_priv,
> INTEL_ELKHARTLAKE)
>  #define IS_TIGERLAKE(dev_priv)   IS_PLATFORM(dev_priv,
> INTEL_TIGERLAKE)
> +#define IS_ROCKETLAKE(dev_priv)  IS_PLATFORM(dev_priv,
> INTEL_ROCKETLAKE)
>  #define IS_HSW_EARLY_SDV(dev_priv) (IS_HASWELL(dev_priv) && \
>   (INTEL_DEVID(dev_priv) & 0xFF00) ==
> 0x0C00)
>  #define IS_BDW_ULT(dev_priv) \
> @@ -1514,6 +1515,13 @@ IS_SUBPLATFORM(const struct drm_i915_private
> *i915,
>  #define IS_TGL_REVID(p, since, until) \
>   (IS_TIGERLAKE(p) && IS_REVID(p, since, until))
>  
> +#define RKL_REVID_A0 0x0
> +#define RKL_REVID_B0 0x1
> +#define RKL_REVID_C0 0x4
> +
> +#define IS_RKL_REVID(p, since, until) \
> + (IS_ROCKETLAKE(p) && IS_REVID(p, since, until))
> +
>  #define IS_LP(dev_priv)  (INTEL_INFO(dev_priv)->is_lp)
>  #define IS_GEN9_LP(dev_priv) (IS_GEN(dev_priv, 9) &&
> IS_LP(dev_priv))
>  #define IS_GEN9_BC(dev_priv) (IS_GEN(dev_priv, 9) &&
> !IS_LP(dev_priv))
> diff --git a/drivers/gpu/drm/i915/i915_pci.c
> b/drivers/gpu/drm/i915/i915_pci.c
> index 1faf9d6ec0a4..5a470bab2214 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -863,6 +863,15 @@ static const struct intel_device_info tgl_info =
> {
>   BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0) |
> BIT(VCS2),
>  };
>  
> +static const struct intel_device_info rkl_info = {
> + GEN12_FEATURES,
> + PLATFORM(INTEL_ROCKETLAKE),
> + .pipe_mask = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C),
> + .require_force_probe = 1,
> + .engine_mask =
> + BIT(RCS0) | BIT(BCS0) | BIT(VECS0) | BIT(VCS0),
> +};
> +
>  #define GEN12_DGFX_FEATURES \
>   GEN12_FEATURES, \
>   .is_dgfx = 1
> @@ -941,6 +950,7 @@ static const struct pci_device_id pciidlist[] = {
>   INTEL_ICL_11_IDS(_info),
>   INTEL_EHL_IDS(_info),
>   INTEL_TGL_12_IDS(_info),
> + INTEL_RKL_IDS(_info),
>   {0, 0, 0}
>  };
>  MODULE_DEVICE_TABLE(pci, pciidlist);
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c
> b/drivers/gpu/drm/i915/intel_device_info.c
> index 91bb7891c70c..9862c1185059 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -61,6 +61,7 @@ static const char * const platform_names[] = {
>   PLATFORM_NAME(ICELAKE),
>   PLATFORM_NAME(ELKHARTLAKE),
>   PLATFORM_NAME(TIGERLAKE),
> + PLATFORM_NAME(ROCKETLAKE),
>  };
>  #undef PLATFORM_NAME
>  
> diff --git a/drivers/gpu/drm/i915/intel_device_info.h
> b/drivers/gpu/drm/i915/intel_device_info.h
> index 69c9257c6c6a..a126984cef7f 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.h
> +++ b/drivers/gpu/drm/i915/intel_device_info.h
> @@ -80,6 +80,7 @@ enum intel_platform {
>   INTEL_ELKHARTLAKE,
>   /* gen12 */
>   INTEL_TIGERLAKE,
> + INTEL_ROCKETLAKE,
>   INTEL_MAX_PLATFORMS
>  };
>  
> diff --git a/include/drm/i915_pciids.h b/include/drm/i915_pciids.h
> index 662d8351c87a..bc989de2aac2 100644
> --- a/include/drm/i915_pciids.h
> +++ b/include/drm/i915_pciids.h
> @@ -605,4 +605,13 @@
>   INTEL_VGA_DEVICE(0x9AD9, info), \
>   INTEL_VGA_DEVICE(0x9AF8, info)
>  
> +/* RKL */
> +#define INTEL_RKL_IDS(info) \
> + INTEL_VGA_DEVICE(0x4C80, info), \
> + INTEL_VGA_DEVICE(0x4C8A, info), \
> + INTEL_VGA_DEVICE(0x4C8B, info), \
> + INTEL_VGA_DEVICE(0x4C8C, info), \
> + INTEL_VGA_DEVICE(0x4C90, info), \
> + INTEL_VGA_DEVICE(0x4C9A, info)
> +
>  #endif /* _I915_PCIIDS_H */

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/1] Revert "drm/i915/tgl: Add extra hdc flush workaround"

2020-03-04 Thread Caz Yokoyama
This reverts commit 36a6b5d964d995b536b1925ec42052ee40ba92c4.
Fixes: 36a6b5d964d9 ("drm/i915/tgl: Add extra hdc flush workaround")

The commit takes care Wa_1604544889 which was fixed on a0 stepping based on
a0 replan. So no SW workaround is required on any stepping now.

Reviewed-by: Matt Roper 
Signed-off-by: Caz Yokoyama 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b9b3f78f1324..f9425e5ed7ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4145,26 +4145,6 @@ static int gen12_emit_flush_render(struct i915_request 
*request,
 
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
-
-   /*
-* Wa_1604544889:tgl
-*/
-   if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
-   flags = 0;
-   flags |= PIPE_CONTROL_CS_STALL;
-   flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
-
-   flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-   flags |= PIPE_CONTROL_QW_WRITE;
-
-   cs = intel_ring_begin(request, 6);
-   if (IS_ERR(cs))
-   return PTR_ERR(cs);
-
-   cs = gen8_emit_pipe_control(cs, flags,
-   LRC_PPHWSP_SCRATCH_ADDR);
-   intel_ring_advance(request, cs);
-   }
}
 
return 0;
-- 
2.21.0.5.gaeb582a983

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/1] Revert "drm/i915/tgl: Add extra hdc flush workaround"

2020-03-04 Thread Caz Yokoyama
This reverts commit 36a6b5d964d995b536b1925ec42052ee40ba92c4.

The commit takes care Wa_1604544889 which was fixed on a0 stepping based on
a0 replan. So no SW workaround is required on any stepping now.

Signed-off-by: Caz Yokoyama 
---
 drivers/gpu/drm/i915/gt/intel_lrc.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index b9b3f78f1324..f9425e5ed7ea 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -4145,26 +4145,6 @@ static int gen12_emit_flush_render(struct i915_request 
*request,
 
*cs++ = preparser_disable(false);
intel_ring_advance(request, cs);
-
-   /*
-* Wa_1604544889:tgl
-*/
-   if (IS_TGL_REVID(request->i915, TGL_REVID_A0, TGL_REVID_A0)) {
-   flags = 0;
-   flags |= PIPE_CONTROL_CS_STALL;
-   flags |= PIPE_CONTROL_HDC_PIPELINE_FLUSH;
-
-   flags |= PIPE_CONTROL_STORE_DATA_INDEX;
-   flags |= PIPE_CONTROL_QW_WRITE;
-
-   cs = intel_ring_begin(request, 6);
-   if (IS_ERR(cs))
-   return PTR_ERR(cs);
-
-   cs = gen8_emit_pipe_control(cs, flags,
-   LRC_PPHWSP_SCRATCH_ADDR);
-   intel_ring_advance(request, cs);
-   }
}
 
return 0;
-- 
2.21.0.5.gaeb582a983

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/1] drm/i915: MCHBAR memory info registers are moved since GEN 12.

2020-02-14 Thread Caz Yokoyama
On Thu, 2020-02-13 at 14:37 -0800, Matt Roper wrote:
> On Tue, Feb 11, 2020 at 10:11:42AM -0800, Caz Yokoyama wrote:
> > From: "Yokoyama, Caz" 
> > 
> > MAD_INTER_CHANNEL_0_0_0_MCHBAR:
> > code nameoffset   DRAM_DDR_TYPE
> > SKL  0x5000   1:0 DDR4/DDR3/LPDDR3
> > TGL  0x6048/0x6248/0xd800 2:0
> > DDR4/DDR3/LPDDR3/LPDDR4
> > ICL  0x5000/0x6048/0x48   2:0
> > DDR4/DDR3/LPDDR3/LPDDR4
> > EHL  0x5000/0x60482:0
> > DDR4/DDR3/LPDDR3/LPDDR4
> > CNL  0x5000   2:0
> > DDR4/DDR3/LPDDR3/LPDDR4
> > 
> > MAD_DIMM_CH0/1_0_0_0_MCHBAR:
> > code name  offset CH0/1
> > SKL0x500c/0x5010
> > TGL0x6054/0x6058
> > EHL0x500c/0x5010
> > ICL0x500c/0x5010
ICL0x500c/0x5010/0x6054/0x6058
as I editted in jira.

> > The bit definition of MAD_DIMM_CH0/1_0_0_0_MCHBAR is same between
> > CNL and TGL.
> 
> Have you actually sanity checked the register addresses here on real
> hardware?  I see the same offsets in the doc as what you've put here,
I expect CI does, i.e. CI builds the driver with my patch and run tests
for all platform. I only run the test on adls simics.
Are you asking me to run tests on several platform? I have i5-8600K as
a test machine.
 
> but since this is a different register database than we get most of
> our
> gfx-specific register details from, it would still be good to double
> check that you do indeed get sensible values back when reading from
> these addresses before we merge the patch.  Especially since the
> database indicates that some of these registers are present at
> multiple
> offsets.
So you are asking me to read value of the following registers on ICL
for example and find whether they have the same value, correct?

0x5000 and 0x6048
0x500c and 0x6054
0x5010 and 0x6058

> 
> > 
> > P_CR_MC_BIOS_REQ_0_0_0 is same on BXT and GLK in terms of its
> > address and
> > bit definition.
> > BXT_D_CR_DRP0_DUNIT accesses offset 0x1000, 0x1200, 0x1400, 0x1600.
> > Its register name is RAM_ACCESS_DATA1. There is no difference
> > between
> > BXT and GLK in terms of its address and bit definition.
> > 
> > Cc: Ville Syrjälä 
> > Cc: Matt Roper 
> > Signed-off-by: Yokoyama, Caz 
> > ---
> >  drivers/gpu/drm/i915/i915_drv.c | 15 ---
> >  drivers/gpu/drm/i915/i915_reg.h |  6 ++
> >  2 files changed, 18 insertions(+), 3 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/i915/i915_drv.c
> > b/drivers/gpu/drm/i915/i915_drv.c
> > index 516536234e97..b9418583e503 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.c
> > +++ b/drivers/gpu/drm/i915/i915_drv.c
> > @@ -806,12 +806,18 @@ skl_dram_get_channels_info(struct
> > drm_i915_private *dev_priv)
> > u32 val;
> > int ret;
> >  
> > -   val = I915_READ(SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN);
> > +   if (INTEL_GEN(dev_priv) >= 12)
> > +   val = I915_READ(TGL_MAD_DIMM_CH0_0_0_0_MCHBAR);
> > +   else
> > +   val = I915_READ(SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN);
> > ret = skl_dram_get_channel_info(dev_priv, , 0, val);
> > if (ret == 0)
> > dram_info->num_channels++;
> >  
> > -   val = I915_READ(SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN);
> > +   if (INTEL_GEN(dev_priv) >= 12)
> > +   val = I915_READ(TGL_MAD_DIMM_CH1_0_0_0_MCHBAR);
> > +   else
> > +   val = I915_READ(SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN);
> > ret = skl_dram_get_channel_info(dev_priv, , 1, val);
> > if (ret == 0)
> > dram_info->num_channels++;
> > @@ -852,7 +858,10 @@ skl_get_dram_type(struct drm_i915_private
> > *dev_priv)
> >  {
> > u32 val;
> >  
> > -   val = I915_READ(SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN);
> > +   if (INTEL_GEN(dev_priv) >= 12)
> > +   val = I915_READ(TGL_MAD_INTER_CHANNEL_0_0_0_MCHBAR);
> > +   else
> > +   val =
> > I915_READ(SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN);
> >  
> > switch (val & SKL_DRAM_DDR_TYPE_MASK) {
> > case SKL_DRAM_DDR_TYPE_DDR3:
> 
> I'm not sure if it might be cleaner to create regs structures for
> each
> platform to centralize the platform-selection logic and avoid all the
> if/else in the code.  E.g.,
> 
> struct i915_mchbar_regs {
> i915_reg_t mad_inter_channel;
> i91

[Intel-gfx] [PATCH 1/1] drm/i915: MCHBAR memory info registers are moved since GEN 12.

2020-02-11 Thread Caz Yokoyama
From: "Yokoyama, Caz" 

MAD_INTER_CHANNEL_0_0_0_MCHBAR:
code nameoffset   DRAM_DDR_TYPE
SKL  0x5000   1:0 DDR4/DDR3/LPDDR3
TGL  0x6048/0x6248/0xd800 2:0 DDR4/DDR3/LPDDR3/LPDDR4
ICL  0x5000/0x6048/0x48   2:0 DDR4/DDR3/LPDDR3/LPDDR4
EHL  0x5000/0x60482:0 DDR4/DDR3/LPDDR3/LPDDR4
CNL  0x5000   2:0 DDR4/DDR3/LPDDR3/LPDDR4

MAD_DIMM_CH0/1_0_0_0_MCHBAR:
code name  offset CH0/1
SKL0x500c/0x5010
TGL0x6054/0x6058
EHL0x500c/0x5010
ICL0x500c/0x5010
The bit definition of MAD_DIMM_CH0/1_0_0_0_MCHBAR is same between
CNL and TGL.

P_CR_MC_BIOS_REQ_0_0_0 is same on BXT and GLK in terms of its address and
bit definition.
BXT_D_CR_DRP0_DUNIT accesses offset 0x1000, 0x1200, 0x1400, 0x1600.
Its register name is RAM_ACCESS_DATA1. There is no difference between
BXT and GLK in terms of its address and bit definition.

Cc: Ville Syrjälä 
Cc: Matt Roper 
Signed-off-by: Yokoyama, Caz 
---
 drivers/gpu/drm/i915/i915_drv.c | 15 ---
 drivers/gpu/drm/i915/i915_reg.h |  6 ++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 516536234e97..b9418583e503 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -806,12 +806,18 @@ skl_dram_get_channels_info(struct drm_i915_private 
*dev_priv)
u32 val;
int ret;
 
-   val = I915_READ(SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN);
+   if (INTEL_GEN(dev_priv) >= 12)
+   val = I915_READ(TGL_MAD_DIMM_CH0_0_0_0_MCHBAR);
+   else
+   val = I915_READ(SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN);
ret = skl_dram_get_channel_info(dev_priv, , 0, val);
if (ret == 0)
dram_info->num_channels++;
 
-   val = I915_READ(SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN);
+   if (INTEL_GEN(dev_priv) >= 12)
+   val = I915_READ(TGL_MAD_DIMM_CH1_0_0_0_MCHBAR);
+   else
+   val = I915_READ(SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN);
ret = skl_dram_get_channel_info(dev_priv, , 1, val);
if (ret == 0)
dram_info->num_channels++;
@@ -852,7 +858,10 @@ skl_get_dram_type(struct drm_i915_private *dev_priv)
 {
u32 val;
 
-   val = I915_READ(SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN);
+   if (INTEL_GEN(dev_priv) >= 12)
+   val = I915_READ(TGL_MAD_INTER_CHANNEL_0_0_0_MCHBAR);
+   else
+   val = I915_READ(SKL_MAD_INTER_CHANNEL_0_0_0_MCHBAR_MCMAIN);
 
switch (val & SKL_DRAM_DDR_TYPE_MASK) {
case SKL_DRAM_DDR_TYPE_DDR3:
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b09c1d6dc0aa..9f6ec44dad6e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -10508,6 +10508,9 @@ enum skl_power_gate {
 #define  SKL_DRAM_DDR_TYPE_LPDDR3  (2 << 0)
 #define  SKL_DRAM_DDR_TYPE_LPDDR4  (3 << 0)
 
+#define  TGL_MAD_INTER_CHANNEL_0_0_0_MCHBAR _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x6048)
+#define  TGL_DRAM_DDR_TYPE_WIO2(4 << 0)
+
 #define SKL_MAD_DIMM_CH0_0_0_0_MCHBAR_MCMAIN   _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x500C)
 #define SKL_MAD_DIMM_CH1_0_0_0_MCHBAR_MCMAIN   _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x5010)
 #define  SKL_DRAM_S_SHIFT  16
@@ -10535,6 +10538,9 @@ enum skl_power_gate {
 #define  CNL_DRAM_RANK_3   (0x2 << 9)
 #define  CNL_DRAM_RANK_4   (0x3 << 9)
 
+#define TGL_MAD_DIMM_CH0_0_0_0_MCHBAR  _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x6054)
+#define TGL_MAD_DIMM_CH1_0_0_0_MCHBAR  _MMIO(MCHBAR_MIRROR_BASE_SNB + 
0x6058)
+
 /* Please see hsw_read_dcomp() and hsw_write_dcomp() before using this 
register,
  * since on HSW we can't write to it using I915_WRITE. */
 #define D_COMP_HSW _MMIO(MCHBAR_MIRROR_BASE_SNB + 0x5F0C)
-- 
2.21.0.5.gaeb582a983

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/selftests: Fix plain use of integer 0 as NULL

2019-04-05 Thread Caz Yokoyama
Reviewed-by: Caz Yokoyama 

On Fri, 2019-04-05 at 12:38 +0100, Tvrtko Ursulin wrote:
> i915_vma_instance

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/2] drm/i915/selftests: Calculate maximum ring size for preemption chain

2019-03-21 Thread Caz Yokoyama
Reviewed-by: Yokoyama, Caz 
-caz

On Thu, 2019-03-21 at 18:42 +, Chris Wilson wrote:
> Quoting Chris Wilson (2019-03-21 18:38:53)
> > Quoting Caz Yokoyama (2019-03-21 18:41:10)
> > > inline
> > > -caz
> > > On Thu, 2019-03-21 at 07:37 +, Chris Wilson wrote:
> > > > 32 is too many for the likes of kbl, and in order to insert
> > > > that many
> > > 
> > > Not only kbl. ring_size is 25 on my cfl.
> > > 
> > > > requests into the ring requires us to declare the first few
> > > > hung --
> > > 
> > > The hung is not caused by 32. It is caused by accumulation of
> > > requests
> > > for all prime numbers.
> > 
> > Sure, but the design of the test is that we don't care for more
> > than
> > ring_size.
> 
> And you can't have more than ring_size requests without hanging on
> the
> spinner. If we pick the maximum that allows for a prime number larger
> than ring_size, it hangs irrespective of whether or not you remember
> to
> flush the lo.ctx in between.
> -Chris

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH 1/2] drm/i915/selftests: Calculate maximum ring size for preemption chain

2019-03-21 Thread Caz Yokoyama
inline
-caz
On Thu, 2019-03-21 at 07:37 +, Chris Wilson wrote:
> 32 is too many for the likes of kbl, and in order to insert that many
Not only kbl. ring_size is 25 on my cfl.

> requests into the ring requires us to declare the first few hung --
The hung is not caused by 32. It is caused by accumulation of requests
for all prime numbers.

> understandably a slow and unexpected process. Instead, measure the
> size
> of a singe requests and use that to estimate the upper bound on the
> chain length we can use for our test, remembering to flush the
> previous
> chain between tests for safety.
> 
> Signed-off-by: Chris Wilson 
> Cc: Tvrtko Ursulin 
> ---
>  drivers/gpu/drm/i915/selftests/intel_lrc.c | 40
> --
>  1 file changed, 37 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> index d61520ea03c1..42068ed5eec0 100644
> --- a/drivers/gpu/drm/i915/selftests/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/selftests/intel_lrc.c
> @@ -615,14 +615,33 @@ static int live_chain_preempt(void *arg)
>   struct i915_sched_attr attr = {
>   .priority =
> I915_USER_PRIORITY(I915_PRIORITY_MAX),
>   };
> - int count, i;
> + struct i915_request *rq;
> + int ring_size, count, i;
>  
>   if (!intel_engine_has_preemption(engine))
>   continue;
>  
> - for_each_prime_number_from(count, 1, 32) { /* must fit
> ring! */
> - struct i915_request *rq;
> + rq = igt_spinner_create_request(,
> + lo.ctx, engine,
> + MI_ARB_CHECK);
> + if (IS_ERR(rq))
> + goto err_wedged;
> + i915_request_add(rq);
> +
> + ring_size = rq->wa_tail - rq->head;
> + if (ring_size < 0)
> + ring_size += rq->ring->size;
> + ring_size = rq->ring->size / ring_size;
> + pr_debug("%s(%s): Using maximum of %d requests\n",
> +  __func__, engine->name, ring_size);
>  
> + igt_spinner_end();
> + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ / 2) <
> 0) {
> + pr_err("Timed out waiting to flush %s\n",
> engine->name);
> + goto err_wedged;
> + }
> +
> + for_each_prime_number_from(count, 1, ring_size) {
>   rq = igt_spinner_create_request(,
>   hi.ctx, engine,
>   MI_ARB_CHECK);
> @@ -664,6 +683,21 @@ static int live_chain_preempt(void *arg)
>   goto err_wedged;
>   }
>   igt_spinner_end();
> +
> + rq = i915_request_alloc(engine, lo.ctx);
> + if (IS_ERR(rq))
> + goto err_wedged;
> + i915_request_add(rq);
This request add is redundant. Wait for the last rq for lo.

> + if (i915_request_wait(rq, I915_WAIT_LOCKED, HZ
> / 5) < 0) {
> + struct drm_printer p =
> + drm_info_printer(i915-
> >drm.dev);
> +
> + pr_err("Failed to flush low priority
> chain of %d requests\n",
> +count);
> + intel_engine_dump(engine, ,
> +   "%s\n", engine-
> >name);
> + goto err_wedged;
> + }
>   }
>   }
>  

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH v4 1/1] i915_pm_rpm: remove gem-execbuf-stress-extra-wait because same as gem-execbuf-stress

2019-03-19 Thread Caz Yokoyama
Extra 5sec delay does not add any value more than gem-execbuf-stress.
It waits until suspend state after a job is added by gem_execbuf().
There is no need to do more when GPU becomes suspended state.
I confirm this by looking at pm_runtime_force_suspend() which exits
on suspend state.

Signed-off-by: Caz Yokoyama 
Cc: Chris Wilson 
---
 tests/i915/i915_pm_rpm.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/i915/i915_pm_rpm.c b/tests/i915/i915_pm_rpm.c
index be296f52..9a91ca0a 100644
--- a/tests/i915/i915_pm_rpm.c
+++ b/tests/i915/i915_pm_rpm.c
@@ -1338,12 +1338,13 @@ static void gem_execbuf_stress_subtest(int rounds, int 
wait_flags)
for (i = 0; i < rounds; i++) {
gem_execbuf(drm_fd, );
 
-   if (wait_flags & WAIT_STATUS)
+   if (wait_flags & WAIT_STATUS) {
+   /* clean up idle work */
+   igt_drop_caches_set(drm_fd, DROP_IDLE);
igt_assert(wait_for_suspended());
+   }
if (wait_flags & WAIT_PC8_RES)
igt_assert(pc8_plus_residency_changed(30));
-   if (wait_flags & WAIT_EXTRA)
-   sleep(5);
}
 
gem_close(drm_fd, handle);
@@ -2083,8 +2084,6 @@ int main(int argc, char *argv[])
gem_execbuf_stress_subtest(rounds, WAIT_STATUS);
igt_subtest("gem-execbuf-stress-pc8")
gem_execbuf_stress_subtest(rounds, WAIT_PC8_RES);
-   igt_subtest("gem-execbuf-stress-extra-wait")
-   gem_execbuf_stress_subtest(rounds, WAIT_STATUS | WAIT_EXTRA);
 
/* power-wake reference tests */
igt_subtest("pm-tiling")
-- 
2.17.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH] drm/i915: Stop needlessly acquiring wakeref for debugfs/drop_caches_set

2019-03-12 Thread Caz Yokoyama
The part of i915_drop_caches_set() is
Reviewed-by: Yokoyama, Caz 

While I don't know whether we don't really need wakeref, calling
intel_runtime_pm_get() makes GPU active state from suspended state for
example. It makes i915_pm_rpm@gem-execbuf-stress-extra-wait test
difficult to make it faster. Thank you.
-caz

On Tue, 2019-03-12 at 15:25 +, Chris Wilson wrote:
> We only need to acquire a wakeref for ourselves for a few operations,
> as
> most either already acquire their own wakeref or imply a wakeref. In
> particular, it is i915_gem_set_wedged() that needed us to present it
> with a wakeref, which is incongruous with its "use anywhere" ability.
> 
> Suggested-by: Yokoyama, Caz 
> Signed-off-by: Chris Wilson 
> Cc: Yokoyama, Caz 
> Cc: Mika Kuoppala 
> ---
>  drivers/gpu/drm/i915/i915_debugfs.c | 12 
>  drivers/gpu/drm/i915/i915_reset.c   |  4 +++-
>  2 files changed, 7 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c
> b/drivers/gpu/drm/i915/i915_debugfs.c
> index 6a90558de213..08683dca7775 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3888,12 +3888,9 @@ static int
>  i915_drop_caches_set(void *data, u64 val)
>  {
>   struct drm_i915_private *i915 = data;
> - intel_wakeref_t wakeref;
> - int ret = 0;
>  
>   DRM_DEBUG("Dropping caches: 0x%08llx [0x%08llx]\n",
> val, val & DROP_ALL);
> - wakeref = intel_runtime_pm_get(i915);
>  
>   if (val & DROP_RESET_ACTIVE &&
>   wait_for(intel_engines_are_idle(i915),
> I915_IDLE_ENGINES_TIMEOUT))
> @@ -3902,9 +3899,11 @@ i915_drop_caches_set(void *data, u64 val)
>   /* No need to check and wait for gpu resets, only libdrm auto-
> restarts
>* on ioctls on -EAGAIN. */
>   if (val & (DROP_ACTIVE | DROP_RETIRE | DROP_RESET_SEQNO)) {
> + int ret;
> +
>   ret = mutex_lock_interruptible(
> >drm.struct_mutex);
>   if (ret)
> - goto out;
> + return ret;
>  
>   if (val & DROP_ACTIVE)
>   ret = i915_gem_wait_for_idle(i915,
> @@ -3943,10 +3942,7 @@ i915_drop_caches_set(void *data, u64 val)
>   if (val & DROP_FREED)
>   i915_gem_drain_freed_objects(i915);
>  
> -out:
> - intel_runtime_pm_put(i915, wakeref);
> -
> - return ret;
> + return 0;
>  }
>  
>  DEFINE_SIMPLE_ATTRIBUTE(i915_drop_caches_fops,
> diff --git a/drivers/gpu/drm/i915/i915_reset.c
> b/drivers/gpu/drm/i915/i915_reset.c
> index 3c08e08837d0..955c22b8dfc7 100644
> --- a/drivers/gpu/drm/i915/i915_reset.c
> +++ b/drivers/gpu/drm/i915/i915_reset.c
> @@ -861,9 +861,11 @@ static void __i915_gem_set_wedged(struct
> drm_i915_private *i915)
>  void i915_gem_set_wedged(struct drm_i915_private *i915)
>  {
>   struct i915_gpu_error *error = >gpu_error;
> + intel_wakeref_t wakeref;
>  
>   mutex_lock(>wedge_mutex);
> - __i915_gem_set_wedged(i915);
> + with_intel_runtime_pm(i915, wakeref)
> + __i915_gem_set_wedged(i915);
>   mutex_unlock(>wedge_mutex);
>  }
>  

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH i-g-t] lib: Incrementally mlock()

2019-02-28 Thread Caz Yokoyama
Reviewed-by: Caz Yokoyama 

Correction of my test report: The patched test runs less than 7 sec on
my nuc. 10 times faster.
-caz

On Wed, 2019-02-27 at 16:29 +, Chris Wilson wrote:
> As we already have the previous portion of the mmap mlocked, we only
> need to mlock() the fresh portion for testing available memory.
> 
> v2: Fixup the uint64_t pointer arithmetric and only use a single mmap
> to
> avoid subsequent mlock fail (for reasons unknown, but bet on mm/).
> 
> Signed-off-by: Chris Wilson 
> Cc: Caz Yokoyama 
> ---
>  lib/igt_aux.h |  2 +-
>  lib/intel_os.c| 40 +--
> 
>  tests/eviction_common.c   | 17 +
>  tests/i915/i915_suspend.c | 17 +++--
>  4 files changed, 35 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/igt_aux.h b/lib/igt_aux.h
> index fb02c026a..09b6246bf 100644
> --- a/lib/igt_aux.h
> +++ b/lib/igt_aux.h
> @@ -194,7 +194,7 @@ void intel_purge_vm_caches(int fd);
>  uint64_t intel_get_avail_ram_mb(void);
>  uint64_t intel_get_total_ram_mb(void);
>  uint64_t intel_get_total_swap_mb(void);
> -size_t intel_get_total_pinnable_mem(void);
> +void *intel_get_total_pinnable_mem(size_t *pinned);
>  
>  int __intel_check_memory(uint64_t count, uint64_t size, unsigned
> mode,
>uint64_t *out_required, uint64_t *out_total);
> diff --git a/lib/intel_os.c b/lib/intel_os.c
> index e1e31e230..dd93bea1a 100644
> --- a/lib/intel_os.c
> +++ b/lib/intel_os.c
> @@ -227,11 +227,9 @@ intel_get_total_swap_mb(void)
>   *
>   * Returns: Amount of memory that can be safely pinned, in bytes.
>   */
> -size_t
> -intel_get_total_pinnable_mem(void)
> +void *intel_get_total_pinnable_mem(size_t *total)
>  {
>   uint64_t *can_mlock, pin, avail;
> - size_t ret;
>  
>   pin = (intel_get_total_ram_mb() + 1) << 20;
>   avail = (intel_get_avail_ram_mb() + 1) << 20;
> @@ -245,34 +243,40 @@ intel_get_total_pinnable_mem(void)
>*/
>   *can_mlock = (avail >> 1) + (avail >> 2);
>   if (mlock(can_mlock, *can_mlock)) {
> - *can_mlock = 0;
> - goto out;
> + munmap(can_mlock, pin);
> + return MAP_FAILED;
>   }
>  
>   for (uint64_t inc = 1024 << 20; inc >= 4 << 10; inc >>= 2) {
> - igt_debug("Testing mlock %'"PRIu64" B (%'"PRIu64"
> MiB)\n",
> -   *can_mlock, *can_mlock >> 20);
> + uint64_t locked = *can_mlock;
> +
> + igt_debug("Testing mlock %'"PRIu64"B (%'"PRIu64"MiB) +
> %'"PRIu64"B\n",
> +   locked, locked >> 20, inc);
>  
>   igt_fork(child, 1) {
> - for (uint64_t bytes = *can_mlock;
> -  bytes <= pin;
> -  bytes += inc) {
> - if (mlock(can_mlock, bytes))
> + uint64_t bytes = *can_mlock;
> +
> + while (bytes <= pin) {
> + if (mlock((void *)can_mlock + bytes,
> inc))
>   break;
>  
> - *can_mlock = bytes;
> + *can_mlock = bytes += inc;
>   __sync_synchronize();
>   }
>   }
>   __igt_waitchildren();
> - igt_assert(!mlock(can_mlock, *can_mlock));
> - }
>  
> -out:
> - ret = *can_mlock;
> - munmap(can_mlock, pin);
> + if (*can_mlock > locked + inc) { /* Weird bit of mm/
> lore */
> + *can_mlock -= inc;
> + igt_debug("Claiming mlock %'"PRIu64"B
> (%'"PRIu64"MiB)\n",
> +   *can_mlock, *can_mlock >> 20);
> + igt_assert(!mlock((void *)can_mlock + locked,
> +   *can_mlock - locked));
> + }
> + }
>  
> - return ret;
> + *total = pin;
> + return can_mlock;
>  }
>  
>  static uint64_t vfs_file_max(void)
> diff --git a/tests/eviction_common.c b/tests/eviction_common.c
> index 321772ba7..a3b9e4167 100644
> --- a/tests/eviction_common.c
> +++ b/tests/eviction_common.c
> @@ -133,23 +133,24 @@ static void mlocked_evictions(int fd, struct
> igt_eviction_test_ops *ops,
> uint64_t surface_size,
> uint64_t surface_count)
>  {
> + uint64_t sz, pin, total;
>   void 

Re: [Intel-gfx] [PATCH i-g-t] lib: Incrementally mlock()

2019-02-27 Thread Caz Yokoyama
inline.

v2 patches fixes
- address calculation bug
- not killed
However, the test does not runs faster.
-caz

On Wed, 2019-02-27 at 16:29 +, Chris Wilson wrote:
> As we already have the previous portion of the mmap mlocked, we only
> need to mlock() the fresh portion for testing available memory.
> 
> v2: Fixup the uint64_t pointer arithmetric and only use a single mmap
> to
> avoid subsequent mlock fail (for reasons unknown, but bet on mm/).
> 
> Signed-off-by: Chris Wilson 
> Cc: Caz Yokoyama 
> ---
>  lib/igt_aux.h |  2 +-
>  lib/intel_os.c| 40 +--
> 
>  tests/eviction_common.c   | 17 +
>  tests/i915/i915_suspend.c | 17 +++--
>  4 files changed, 35 insertions(+), 41 deletions(-)
> 
> diff --git a/lib/igt_aux.h b/lib/igt_aux.h
> index fb02c026a..09b6246bf 100644
> --- a/lib/igt_aux.h
> +++ b/lib/igt_aux.h
> @@ -194,7 +194,7 @@ void intel_purge_vm_caches(int fd);
>  uint64_t intel_get_avail_ram_mb(void);
>  uint64_t intel_get_total_ram_mb(void);
>  uint64_t intel_get_total_swap_mb(void);
> -size_t intel_get_total_pinnable_mem(void);
> +void *intel_get_total_pinnable_mem(size_t *pinned);
>  
>  int __intel_check_memory(uint64_t count, uint64_t size, unsigned
> mode,
>uint64_t *out_required, uint64_t *out_total);
> diff --git a/lib/intel_os.c b/lib/intel_os.c
> index e1e31e230..dd93bea1a 100644
> --- a/lib/intel_os.c
> +++ b/lib/intel_os.c
> @@ -227,11 +227,9 @@ intel_get_total_swap_mb(void)
>   *
>   * Returns: Amount of memory that can be safely pinned, in bytes.
>   */
> -size_t
> -intel_get_total_pinnable_mem(void)
> +void *intel_get_total_pinnable_mem(size_t *total)
>  {
>   uint64_t *can_mlock, pin, avail;
> - size_t ret;
>  
>   pin = (intel_get_total_ram_mb() + 1) << 20;
>   avail = (intel_get_avail_ram_mb() + 1) << 20;
> @@ -245,34 +243,40 @@ intel_get_total_pinnable_mem(void)
>*/
>   *can_mlock = (avail >> 1) + (avail >> 2);
>   if (mlock(can_mlock, *can_mlock)) {
> - *can_mlock = 0;
> - goto out;
> + munmap(can_mlock, pin);
> + return MAP_FAILED;
>   }
>  
>   for (uint64_t inc = 1024 << 20; inc >= 4 << 10; inc >>= 2) {
> - igt_debug("Testing mlock %'"PRIu64" B (%'"PRIu64"
> MiB)\n",
> -   *can_mlock, *can_mlock >> 20);
> + uint64_t locked = *can_mlock;
> +
> + igt_debug("Testing mlock %'"PRIu64"B (%'"PRIu64"MiB) +
> %'"PRIu64"B\n",
> +   locked, locked >> 20, inc);
>  
>   igt_fork(child, 1) {
> - for (uint64_t bytes = *can_mlock;
> -  bytes <= pin;
> -  bytes += inc) {
> - if (mlock(can_mlock, bytes))
> + uint64_t bytes = *can_mlock;
> +
> + while (bytes <= pin) {
> + if (mlock((void *)can_mlock + bytes,
> inc))
>   break;
>  
> - *can_mlock = bytes;
> + *can_mlock = bytes += inc;
>   __sync_synchronize();
>   }
>   }
>   __igt_waitchildren();
> - igt_assert(!mlock(can_mlock, *can_mlock));
> - }
>  
> -out:
> - ret = *can_mlock;
> - munmap(can_mlock, pin);
> + if (*can_mlock > locked + inc) { /* Weird bit of mm/
> lore */
> + *can_mlock -= inc;
> 
Are you able to explain this? Is this for when child is killed during
updating *can_mlock?

If the condition is not met, parent does not mlock. Is this OK?

> + igt_debug("Claiming mlock %'"PRIu64"B
> (%'"PRIu64"MiB)\n",
> +   *can_mlock, *can_mlock >> 20);
> + igt_assert(!mlock((void *)can_mlock + locked,
> 
> +   *can_mlock - locked));
> + }
> + }
>  
> - return ret;
> + *total = pin;
> 
*total = *can_mlock?

Also you don't unmap. I mean map and unmap are not paired in the same
function. Not elegant. But c'est la vie.
-caz

> + return can_mlock;
>  }
>  
>  static uint64_t vfs_file_max(void)
> diff --git a/tests/eviction_common.c b/tests/eviction_common.c
> index 321772ba7..a3b9e4167 100644
> --- a/tests/eviction_common.c
> +++ b/test

Re: [Intel-gfx] [PATCH i-g-t] i915/gem_ctx_switch: Use minimum qlen over all engines and measure switches

2019-02-26 Thread Caz Yokoyama
Reviewed-by:  Caz Yokoyama 
-caz

On Mon, 2019-02-25 at 18:29 +, Chris Wilson wrote:
> Quoting Caz Yokoyama (2019-02-25 18:28:34)
> > Chris,
> > By your patch, measure_qlen() reports how many gem_execbuf() can be
> > executed(queue length) within timeout of the slowest engine,
> > correct?
> 
> More or less, yes.
> -Chris

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH i-g-t] i915/gem_ctx_switch: Use minimum qlen over all engines and measure switches

2019-02-25 Thread Caz Yokoyama
Chris,
By your patch, measure_qlen() reports how many gem_execbuf() can be
executed(queue length) within timeout of the slowest engine, correct?

Run time becomes 95 sec which is less than half.
-caz

On Sat, 2019-02-23 at 01:34 +, Chris Wilson wrote:
> Not all engines are created equal, and our weighting ends up
> favouring
> the many faster xCS rings at the expense of RCS. Our qlen estimation
> also failed to factor in the context switch overhead, which is a
> significant factor for nop batches. So we oversubscribe the number of
> batches submitted to RCS and end up waiting for those to complete at
> the
> end of our subtest timeslice.
> 
> Signed-off-by: Chris Wilson 
> Cc: Caz Yokoyama 
> ---
>  tests/i915/gem_ctx_switch.c | 39 +
> 
>  1 file changed, 31 insertions(+), 8 deletions(-)
> 
> diff --git a/tests/i915/gem_ctx_switch.c
> b/tests/i915/gem_ctx_switch.c
> index 1208cb8d7..87e13b915 100644
> --- a/tests/i915/gem_ctx_switch.c
> +++ b/tests/i915/gem_ctx_switch.c
> @@ -26,6 +26,7 @@
>   */
>  
>  #include "igt.h"
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -58,29 +59,50 @@ static int measure_qlen(int fd,
>  {
>   const struct drm_i915_gem_exec_object2 * const obj =
>   (struct drm_i915_gem_exec_object2 *)(uintptr_t)execbuf-
> >buffers_ptr;
> - int qlen = 64;
> + uint32_t ctx[64];
> + int min = INT_MAX, max = 0;
> +
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> + ctx[i] = gem_context_create(fd);
>  
>   for (unsigned int n = 0; n < nengine; n++) {
>   uint64_t saved = execbuf->flags;
>   struct timespec tv = {};
> + int q;
>  
>   execbuf->flags |= engine[n];
>  
> - igt_nsec_elapsed();
> - for (int loop = 0; loop < qlen; loop++)
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
> + execbuf->rsvd1 = ctx[i];
>   gem_execbuf(fd, execbuf);
> + }
>   gem_sync(fd, obj->handle);
>  
> - execbuf->flags = saved;
> + igt_nsec_elapsed();
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++) {
> + execbuf->rsvd1 = ctx[i];
> + gem_execbuf(fd, execbuf);
> + }
> + gem_sync(fd, obj->handle);
>  
>   /*
>* Be conservative and aim not to overshoot timeout, so
> scale
>* down by 8 for hopefully a max of 12.5% error.
>*/
> - qlen = qlen * timeout * 1e9 / igt_nsec_elapsed() / 8
> + 1;
> + q = ARRAY_SIZE(ctx) * timeout * 1e9 /
> igt_nsec_elapsed() / 8 + 1;
> + if (q < min)
> + min = q;
> + if (q > max)
> + max = q;
> +
> + execbuf->flags = saved;
>   }
>  
> - return qlen;
> + for (int i = 0; i < ARRAY_SIZE(ctx); i++)
> + gem_context_destroy(fd, ctx[i]);
> +
> + igt_debug("Estimated qlen: {min:%d, max:%d}\n", min, max);
> + return min;
>  }
>  
>  static void single(int fd, uint32_t handle,
> @@ -259,9 +281,10 @@ static void all(int fd, uint32_t handle,
> unsigned flags, int timeout)
>   clock_gettime(CLOCK_MONOTONIC, );
>   gem_close(fd, obj[0].handle);
>  
> - igt_info("[%d:%d] %s: %'u cycles:
> %.3fus%s\n",
> + igt_info("[%d:%d] %s: %'u cycles:
> %.3fus%s (elapsed: %.3fs)\n",
>nctx, child, name[child],
> count, elapsed(, )*1e6 / count,
> -  flags & INTERRUPTIBLE ? "
> (interruptible)" : "");
> +  flags & INTERRUPTIBLE ? "
> (interruptible)" : "",
> +  elapsed(, ));
>   }
>   igt_waitchildren();
>   }

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx