Re: [Intel-gfx] [PATCH 1/3] drm/i915/perf: rework aging tail workaround

2020-03-22 Thread Dixit, Ashutosh
On Sat, 21 Mar 2020 21:44:43 -0700, Dixit, Ashutosh wrote:
>
> Actually a couple of further improvements to the loop above are
> possible. First there is no reason to start at previous_tail, we can just
> start at the aligned hw_tail itself.

Unless we deliberately want to wait and delay declaring the data as valid,
i.e. we are really not sure what is happening and it seems "safer" to wait
for an extra report.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915/slpc: Fix inconsistent locked return

2022-08-30 Thread Dixit, Ashutosh
On Tue, 30 Aug 2022 08:02:29 -0700, Rodrigo Vivi wrote:
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> index 9d49ccef03bb..f8a2bbcdf14f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> @@ -477,7 +477,7 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
> *slpc, u32 val)
>   if (unlikely(ret)) {
>   i915_probe_error(i915, "Failed to toggle efficient freq 
> (%pe)\n",
>ERR_PTR(ret));
> - return ret;
> + goto unlock;

I think leaking runtime_pm wakeref now...

>   }
>
>   ret = slpc_set_param(slpc,
> @@ -492,6 +492,7 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
> *slpc, u32 val)
>   if (!ret)
>   slpc->min_freq_softlimit = val;
>
> +unlock:
>   mutex_unlock(&slpc->lock);
>
>   return ret;
> --
> 2.37.2
>


Re: [Intel-gfx] [PATCH] drm/i915/slpc: Fix PCODE IA Freq requests when using SLPC

2022-08-30 Thread Dixit, Ashutosh
On Fri, 26 Aug 2022 13:03:05 -0700, Dixit, Ashutosh wrote:
>
> On Fri, 26 Aug 2022 10:44:34 -0700, Rodrigo Vivi wrote:
> >
> > Fixes: 7ba79a671568 ("drm/i915/guc/slpc: Gate Host RPS when SLPC is 
> > enabled")
> > Cc:  # v5.15+
> > Cc: Ashutosh Dixit 
> > Tested-by: Sushma Venkatesh Reddy 
> > Signed-off-by: Rodrigo Vivi 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_llc.c | 24 
> >  1 file changed, 16 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_llc.c 
> > b/drivers/gpu/drm/i915/gt/intel_llc.c
> > index 14fe65812e42..2677d62573d9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_llc.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_llc.c
> > @@ -49,13 +49,28 @@ static unsigned int cpu_max_MHz(void)
> >  static bool get_ia_constants(struct intel_llc *llc,
> >  struct ia_constants *consts)
> >  {
> > +   struct intel_guc_slpc *slpc = &llc_to_gt(llc)->uc.guc.slpc;
> > struct drm_i915_private *i915 = llc_to_gt(llc)->i915;
> > struct intel_rps *rps = &llc_to_gt(llc)->rps;
> >
> > if (!HAS_LLC(i915) || IS_DGFX(i915))
> > return false;
> >
> > -   if (rps->max_freq <= rps->min_freq)
> > +   if (intel_uc_uses_guc_slpc(&llc_to_gt(llc)->uc)) {
> > +   consts->min_gpu_freq = slpc->min_freq;
> > +   consts->max_gpu_freq = slpc->rp0_freq;

Sorry, this also doesn't work because slpc freq are converted using
intel_gpu_freq(). How about calling gen6_rps_get_freq_caps() directly in
the SLPC case? Or we could go with the original patch for now with a FIXME?
Thanks.

> > +   } else {
> > +   consts->min_gpu_freq = rps->min_freq;
> > +   consts->max_gpu_freq = rps->max_freq;
> > +   }
> > +
> > +   if (GRAPHICS_VER(i915) >= 9) {
> > +   /* Convert GT frequency to 50 HZ units */
> > +   consts->min_gpu_freq /= GEN9_FREQ_SCALER;
> > +   consts->max_gpu_freq /= GEN9_FREQ_SCALER;
> > +   }


Re: [Intel-gfx] [PATCH] drm/i915/slpc: Fix inconsistent locked return

2022-08-30 Thread Dixit, Ashutosh
On Tue, 30 Aug 2022 12:35:37 -0700, Rodrigo Vivi wrote:
>
> Fix for intel_guc_slpc_set_min_freq() warn:
> inconsistent returns '&slpc->lock'.
>
> v2: Avoid with_intel_runtime_pm with the
> internal goto/return. (Ashutosh)
> Also standardize the 'ret' if this came from
> the efficient setup. And avoid the 'unlikely'.

Reviewed-by: Ashutosh Dixit 

> Fixes: 95ccf312a1e4 ("drm/i915/guc/slpc: Allow SLPC to use efficient 
> frequency")
> Reported-by: kernel test robot 
> Reported-by: Dan Carpenter 
> Cc: Ashutosh Dixit 
> Signed-off-by: Rodrigo Vivi 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c | 40 ++---
>  1 file changed, 20 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> index 9d49ccef03bb..fdd895f73f9f 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> @@ -467,33 +467,33 @@ int intel_guc_slpc_set_min_freq(struct intel_guc_slpc 
> *slpc, u32 val)
>
>   /* Need a lock now since waitboost can be modifying min as well */
>   mutex_lock(&slpc->lock);
> -
> - with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
> -
> - /* Ignore efficient freq if lower min freq is requested */
> - ret = slpc_set_param(slpc,
> -  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
> -  val < slpc->rp1_freq);
> - if (unlikely(ret)) {
> - i915_probe_error(i915, "Failed to toggle efficient freq 
> (%pe)\n",
> -  ERR_PTR(ret));
> - return ret;
> - }
> -
> - ret = slpc_set_param(slpc,
> -  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> -  val);
> -
> - /* Return standardized err code for sysfs calls */
> - if (ret)
> - ret = -EIO;
> + wakeref = intel_runtime_pm_get(&i915->runtime_pm);
> +
> + /* Ignore efficient freq if lower min freq is requested */
> + ret = slpc_set_param(slpc,
> +  SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
> +  val < slpc->rp1_freq);
> + if (ret) {
> + i915_probe_error(i915, "Failed to toggle efficient freq 
> (%pe)\n",
> +  ERR_PTR(ret));
> + goto out;
>   }
>
> + ret = slpc_set_param(slpc,
> +  SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
> +  val);
> +
>   if (!ret)
>   slpc->min_freq_softlimit = val;
>
> +out:
> + intel_runtime_pm_put(&i915->runtime_pm, wakeref);
>   mutex_unlock(&slpc->lock);
>
> + /* Return standardized err code for sysfs calls */
> + if (ret)
> + ret = -EIO;
> +
>   return ret;
>  }
>
> --
> 2.37.2
>


Re: [Intel-gfx] [PATCH] drm/i915/slpc: Let's fix the PCODE min freq table setup for SLPC

2022-08-30 Thread Dixit, Ashutosh
On Tue, 30 Aug 2022 12:16:20 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

> @@ -65,13 +63,8 @@ static bool get_ia_constants(struct intel_llc *llc,
>   /* convert DDR frequency from units of 266.6MHz to bandwidth */
>   consts->min_ring_freq = mult_frac(consts->min_ring_freq, 8, 3);
>
> - consts->min_gpu_freq = rps->min_freq;
> - consts->max_gpu_freq = rps->max_freq;
> - if (GRAPHICS_VER(i915) >= 9) {
> - /* Convert GT frequency to 50 HZ units */
> - consts->min_gpu_freq /= GEN9_FREQ_SCALER;
> - consts->max_gpu_freq /= GEN9_FREQ_SCALER;
> - }
> + consts->min_gpu_freq = intel_rps_get_min_raw_freq(rps);
> + consts->max_gpu_freq = intel_rps_get_max_raw_freq(rps);
>
>   return true;
>  }
> @@ -130,6 +123,12 @@ static void gen6_update_ring_freq(struct intel_llc *llc)
>   if (!get_ia_constants(llc, &consts))
>   return;
>
> + /*
> +  * Although this is unlikely on any platform during initialization,
> +  * let's ensure we don't get accidentally into infinite loop
> +  */
> + if (consts.max_gpu_freq <= consts.min_gpu_freq)
> + return;

As I said this is not correct and is not needed. If 'consts.max_gpu_freq ==
consts.min_gpu_freq' we would *want* to program PCODE. If
'consts.max_gpu_freq < consts.min_gpu_freq' the loop will automatically
skip (and also it is not an infinite loop).

> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
> b/drivers/gpu/drm/i915/gt/intel_rps.c
> index de794f5f8594..26af974292c7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -2156,6 +2156,24 @@ u32 intel_rps_get_max_frequency(struct intel_rps *rps)
>   return intel_gpu_freq(rps, rps->max_freq_softlimit);
>  }
>
> +u32 intel_rps_get_max_raw_freq(struct intel_rps *rps)

What does "raw" mean? Or are we introducing a new concept here then we need
to explain the new concept? I was previously told there is a concept of "hw
units" of freq and intel_gpu_freq will convert from "hw units" to MHz.

Also, Is the return value in units of 50 MHz in all cases (we know it is
for SLPC and Gen 9+)? In that case we should name such a function to
something like intel_rps_get_max_freq_in_50mhz_units?

> +{
> + struct intel_guc_slpc *slpc = rps_to_slpc(rps);
> + u32 freq;
> +
> + if (rps_uses_slpc(rps)) {
> + return DIV_ROUND_CLOSEST(slpc->rp0_freq,
> +  GT_FREQUENCY_MULTIPLIER);
> + } else {
> + freq = rps->max_freq;
> + if (GRAPHICS_VER(rps_to_i915(rps)) >= 9) {
> + /* Convert GT frequency to 50 HZ units */

50 MHz and not 50 Hz. Also the comment should be moved to above
rps_uses_slpc() line if returned freq is always in units of 50 MHz.

> + freq /= GEN9_FREQ_SCALER;
> + }
> + return freq;
> + }
> +}

Also is this function equivalent to this:

u32 intel_rps_get_max_freq_in_50mhz_units(struct intel_rps *rps)
{
struct intel_guc_slpc *slpc = rps_to_slpc(rps);
u32 freq;

/* freq in MHz */
freq = rps_uses_slpc(rps) ? slpc->rp0_freq : 
intel_gpu_freq(rps->max_freq);

return DIV_ROUND_CLOSEST(freq, GT_FREQUENCY_MULTIPLIER);
}

Sorry I don't have a lot of history in how these frequencies are scaled
specially for old platforms like CHV/VLV/Gen6+. But afaiu intel_gpu_freq()
will convert the freq to MHz for all platforms.

And then does get_ia_constants() accept freq in 50 MHz units for all
platforms?

If we are not sure about this, we can go with your version which is closer
to the original version in get_ia_constants() and so "safer" I guess.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] i915/pmu: Wire GuC backend to per-client busyness

2022-08-31 Thread Dixit, Ashutosh
On Fri, 26 Aug 2022 09:33:08 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

Just to communicate my thoughts I have posted this patch on top of your
patch:

[1] https://patchwork.freedesktop.org/series/107983/

Could you please take a look at that and see if it makes sense.

> On Thu, Aug 25, 2022 at 06:44:50PM -0700, Dixit, Ashutosh wrote:
> > On Thu, 04 Aug 2022 16:21:25 -0700, Umesh Nerlige Ramappa wrote:
> >
> > Hi Umesh, I am fairly new to this code so some questions will be below will
> > be newbie questions, thanks for bearing with me.
> >
> >> diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
> >> b/drivers/gpu/drm/i915/gt/intel_context.c
> >> index 654a092ed3d6..e2d70a9fdac0 100644
> >> --- a/drivers/gpu/drm/i915/gt/intel_context.c
> >> +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> >> @@ -576,16 +576,24 @@ void intel_context_bind_parent_child(struct 
> >> intel_context *parent,
> >>child->parallel.parent = parent;
> >>  }
> >>
> >> -u64 intel_context_get_total_runtime_ns(const struct intel_context *ce)
> >> +u64 intel_context_get_total_runtime_ns(struct intel_context *ce)
> >>  {
> >>u64 total, active;
> >>
> >> +  if (ce->ops->update_stats)
> >> +  ce->ops->update_stats(ce);
> >> +
> >>total = ce->stats.runtime.total;
> >>if (ce->ops->flags & COPS_RUNTIME_CYCLES)
> >>total *= ce->engine->gt->clock_period_ns;
> >>
> >>active = READ_ONCE(ce->stats.active);
> >> -  if (active)
> >> +  /*
> >> +   * When COPS_RUNTIME_ACTIVE_TOTAL is set for ce->cops, the backend
> >> +   * already provides the total active time of the context, so skip this
> >> +   * calculation when this flag is set.
> >> +   */
> >> +  if (active && !(ce->ops->flags & COPS_RUNTIME_ACTIVE_TOTAL))
> >>active = intel_context_clock() - active;
> >>
> >>return total + active;
> >
> > /snip/
> >
> >> @@ -1396,6 +1399,10 @@ static void guc_timestamp_ping(struct work_struct 
> >> *wrk)
> >>with_intel_runtime_pm(>->i915->runtime_pm, wakeref)
> >>__update_guc_busyness_stats(guc);
> >>
> >> +  /* adjust context stats for overflow */
> >> +  xa_for_each(&guc->context_lookup, index, ce)
> >> +  __guc_context_update_clks(ce);
> >
> > What is the reason for calling __guc_context_update_clks() periodically
> > from guc_timestamp_ping() since it appears we should just be able to call
> > __guc_context_update_clks() from intel_context_get_total_runtime_ns() to
> > update 'active'? Is the reason for calling __guc_context_update_clks()
> > periodically that the calculations in __guc_context_update_clks() become
> > invalid if the counters overflow?
>
> Correct, these are 32-bit counters and the worker just tracks overflow.

OK.

>
> >
> >> +
> >>intel_gt_reset_unlock(gt, srcu);
> >>
> >>mod_delayed_work(system_highpri_wq, &guc->timestamp.work,
> >> @@ -1469,6 +1476,56 @@ void intel_guc_busyness_unpark(struct intel_gt *gt)
> >> guc->timestamp.ping_delay);
> >>  }
> >>
> >> +static void __guc_context_update_clks(struct intel_context *ce)
> >> +{
> >> +  struct intel_guc *guc = ce_to_guc(ce);
> >> +  struct intel_gt *gt = ce->engine->gt;
> >> +  u32 *pphwsp, last_switch, engine_id;
> >> +  u64 start_gt_clk, active;
> >> +  unsigned long flags;
> >> +  ktime_t unused;
> >> +
> >> +  spin_lock_irqsave(&guc->timestamp.lock, flags);
> >> +
> >> +  /*
> >> +   * GPU updates ce->lrc_reg_state[CTX_TIMESTAMP] when context is switched
> >> +   * out, however GuC updates PPHWSP offsets below. Hence KMD (CPU)
> >> +   * relies on GuC and GPU for busyness calculations. Due to this, A
> >> +   * potential race was highlighted in an earlier review that can lead to
> >> +   * double accounting of busyness. While the solution to this is a wip,
> >> +   * busyness is still usable for platforms running GuC submission.
> >> +   */
> >> +  pphwsp = ((void *)ce->lrc_reg_state) - LRC_STATE_OFFSET;
> >> +  last_switch = READ_ONCE(pphwsp[PPHWSP_GUC_CONTEXT_USAGE_STAMP_LO]);
> >> +  engine_id = READ_ONCE(pphwsp[PPHWSP_GUC_CONTEXT_USAGE_ENGINE_ID]);
> >> +
> >> +  guc_update_pm_timestamp(guc, &a

Re: [Intel-gfx] [PATCH] drm/i915/slpc: Let's fix the PCODE min freq table setup for SLPC

2022-08-31 Thread Dixit, Ashutosh
On Wed, 31 Aug 2022 14:45:38 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

> We need to inform PCODE of a desired ring frequencies so PCODE update
> the memory frequencies to us. rps->min_freq and rps->max_freq are the
> frequencies used in that request. However they were unset when SLPC was
> enabled and PCODE never updated the memory freq.
>
> v2 (as Suggested by Ashutosh): if SLPC is in use, let's pick the right
>frequencies from the get_ia_constants instead of the fake init of
>rps' min and max.
>
> v3: don't forget the max <= min return
>
> v4: Move all the freq conversion to intel_rps.c. And the max <= min
> check to where it belongs.
>
> v5: (Ashutosh) Fix old comment s/50 HZ/50 MHz and add a doc explaining
> the "raw format"

I think we both agree that mostly the way this patch is written it is to
add SLPC but not risk disturbing host turbo, specially old platforms
(CHV/VLV/ILK and pre-Gen 6). Also these freq units (sometimes 16.67 MHz
units, sometimes 50 MHz, sometime MHz) in different places in the driver
and different product generations is hugely confusing to say the least. For
old platform we don't really know what units the freq's are in, we only
know intel_gpu_freq will magically convert freq's to MHz. In any case let's
work with what we have.

> @@ -130,6 +123,12 @@ static void gen6_update_ring_freq(struct intel_llc *llc)
>   if (!get_ia_constants(llc, &consts))
>   return;
>
> + /*
> +  * Although this is unlikely on any platform during initialization,
> +  * let's ensure we don't get accidentally into infinite loop
> +  */
> + if (consts.max_gpu_freq <= consts.min_gpu_freq)
> + return;

As I said I would remove reference to "infinite loop", I am not seeing any
infinite loop, maybe just delete the comment.

Also as I said I see the check above should be completely removed (so it is
actually a pre-existing bug in the code). However since you want to carry
it forward in order not to risk disturbing legacy behavior that's fine.

Rest LGTM:

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [RFC PATCH 2/2] Fix per client busyness locking

2022-09-01 Thread Dixit, Ashutosh
On Wed, 31 Aug 2022 15:45:49 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

I have updated my RFC patch based on your feedback so we can discuss again.

> On Wed, Aug 31, 2022 at 12:33:55PM -0700, Ashutosh Dixit wrote:
> > 1. Do all ce->stats updates and reads under guc->timestamp.lock
>
> Other than the question about READ_ONCE/WRITE_ONCE, I am not sure I
> understand what's broken in the locking logic here. Can you please add some
> description? thanks

Basically ce->stats.runtime.total and ce->stats.active are tied together so
should be accessed/updated atomically. Also just the update of
ce->stats.runtime.total (via lrc_update_runtime()) can have multiple
concurrent writers so even that has to be protected (since that update is
not atomic).

These was the reason for:
* Introducing lrc_update_runtime_locked
* Returning early from intel_context_get_total_runtime_ns otherwise we'll
  need to introduce the same locking there
* Enforcing locking in guc_context_update_stats (which was there in your
  original patch too)

(I think execlists code didn't have this issue because there
lrc_update_runtime is called from a tasklet (which iirc is like a single
thread/cpu). I am not sure how 'active' is updated in execlist mode so
there may or may not be a problem in intel_context_get_total_runtime_ns).

I have another question: what about that GPU vs. GuC race mentioned in the
comment? What is the sequence of events there between GPU updating the
timestamp in the context image vs. GuC setting engine_id to -1 (at least
what is the sequence in which GPU and GuC supposed to do these updates
though it may not matter to i915 due to scheduling delays)?

>
> > 2. Pin context image before reading
> > 3. Merge __guc_context_update_clks and guc_context_update_stats into a
> >   single function
> > 4. Call lrc_update_runtime() unconditionally in guc_context_update_stats
> > 5. Seems no need to update ce->stats.active with this approach
> >
> > Some of the above steps may not be correct or complete but the idea is to
> > discuss/improve the original patch.
> >
> > Cc: Umesh Nerlige Ramappa 
> > Signed-off-by: Ashutosh Dixit 
> > ---
> > drivers/gpu/drm/i915/gt/intel_context.c   |  2 +-
> > drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
> > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 41 ++-
> > 3 files changed, 24 insertions(+), 21 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
> > b/drivers/gpu/drm/i915/gt/intel_context.c
> > index e2d70a9fdac0..c004f676de27 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> > @@ -581,7 +581,7 @@ u64 intel_context_get_total_runtime_ns(struct 
> > intel_context *ce)
> > u64 total, active;
> >
> > if (ce->ops->update_stats)
> > -   ce->ops->update_stats(ce);
> > +   return ce->ops->update_stats(ce);
>
> This is an improvement that we can add and eventually, we can make this a
> GuC specific vfunc. Doing so may also remove the COPS_RUNTIME_ACTIVE_TOTAL
> option that I added to GuC specific context ops.

I went ahead and did this in v2 of the RFC patch.

> >
> > total = ce->stats.runtime.total;
> > if (ce->ops->flags & COPS_RUNTIME_CYCLES)
> > diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h 
> > b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > index f7ff4c7d81c7..699435bfff99 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_context_types.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
> > @@ -59,7 +59,7 @@ struct intel_context_ops {
> >
> > void (*sched_disable)(struct intel_context *ce);
> >
> > -   void (*update_stats)(struct intel_context *ce);
> > +   u64 (*update_stats)(struct intel_context *ce);
> >
> > void (*reset)(struct intel_context *ce);
> > void (*destroy)(struct kref *kref);
> > diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> > b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > index bee8cf10f749..40d0edaf2e0a 100644
> > --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> > @@ -1376,7 +1376,6 @@ static void __update_guc_busyness_stats(struct 
> > intel_guc *guc)
> > spin_unlock_irqrestore(&guc->timestamp.lock, flags);
> > }
> >
> > -static void __guc_context_update_clks(struct intel_context *ce);
> > static void guc_timestamp_ping(struct work_struct *wrk)
> > {
> > struct intel_guc *guc = container_of(wrk, typeof(*guc),
> > @@ -1401,7 +1400,8 @@ static void guc_timestamp_ping(struct work_struct 
> > *wrk)
> >
> > /* adjust context stats for overflow */
> > xa_for_each(&guc->context_lookup, index, ce)
> > -   __guc_context_update_clks(ce);
> > +   if (ce->ops->update_stats)
> > +   ce->ops->update_stats(ce);
>
> context pinning logic needs to be separated for this (ping) path vs. the
> query path - intel_context_get_total_runtime_ns().

Done in v2 of RFC patch.

> >
> 

Re: [Intel-gfx] [PATCH 2/2] drm/i915: Let's avoid even early init if SLPC is used.

2022-09-02 Thread Dixit, Ashutosh
On Fri, 02 Sep 2022 02:51:26 -0700, Rodrigo Vivi wrote:
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
> b/drivers/gpu/drm/i915/gt/intel_rps.c
> index 6fadde4ee7bf..c29652281f2e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -1960,6 +1960,9 @@ void gen5_rps_irq_handler(struct intel_rps *rps)
>
>  void intel_rps_init_early(struct intel_rps *rps)
>  {
> + if (rps_uses_slpc(rps))
> + return;
> +

Hi Rodrigo, Let me double check with you, this works correctly at this
place, correct? Looks like things to make this work should get initialized
in intel_uc_init_early() which is called just before intel_rps_init_early()
so looks ok (can't tell for sure with all those uc macros :/):

Reviewed-by: Ashutosh Dixit 

>   mutex_init(&rps->lock);
>   mutex_init(&rps->power.mutex);
>
> --
> 2.37.2
>


Re: [Intel-gfx] [PATCH] drm/i915/guc: Cancel GuC engine busyness worker synchronously

2022-09-05 Thread Dixit, Ashutosh
On Fri, 26 Aug 2022 17:21:35 -0700, Umesh Nerlige Ramappa wrote:
>
> The worker is canceled in gt_park path, but earlier it was assumed that
> gt_park path cannot sleep and the cancel is asynchronous. This caused a
> race with suspend flow where the worker runs after suspend and causes an
> unclaimed register access warning. Cancel the worker synchronously since
> the gt_park is indeed allowed to sleep.

Indeed, __gt_park already calls cancel_work_sync and synchronize_irq which
can sleep:

Reviewed-by: Ashutosh Dixit 

> v2: Fix author name and sign-off mismatch
>
> Signed-off-by: Umesh Nerlige Ramappa 
> Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/4419
> Fixes: 77cdd054dd2c ("drm/i915/pmu: Connect engine busyness stats from GuC to 
> pmu")
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> index 0d56b615bf78..e6275380b253 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
> @@ -1438,7 +1438,12 @@ void intel_guc_busyness_park(struct intel_gt *gt)
>   if (!guc_submission_initialized(guc))
>   return;
>
> - cancel_delayed_work(&guc->timestamp.work);
> + /*
> +  * There is a race with suspend flow where the worker runs after suspend
> +  * and causes an unclaimed register access warning. Cancel the worker
> +  * synchronously here.
> +  */
> + cancel_delayed_work_sync(&guc->timestamp.work);
>
>   /*
>* Before parking, we should sample engine busyness stats if we need to.
> --
> 2.25.1
>


Re: [Intel-gfx] [PATCH 1/6] drm/i915: Prepare more multi-GT initialization

2022-09-06 Thread Dixit, Ashutosh
On Tue, 06 Sep 2022 07:07:41 -0700, Rodrigo Vivi wrote:
>

Copying author, these patches are from a different series
(https://patchwork.freedesktop.org/series/107908/) as mentioned in the
cover letter.

> On Fri, Sep 02, 2022 at 04:52:57PM -0700, Ashutosh Dixit wrote:
> > From: Matt Roper 
> >
> > We're going to introduce an additional intel_gt for MTL's media unit
> > soon.  Let's provide a bit more multi-GT initialization framework in
> > preparation for that.  The initialization will pull the list of GTs for
> > a platform from the device info structure.  Although necessary for the
> > immediate MTL media enabling, this same framework will also be used
> > farther down the road when we enable remote tiles on xehpsdv and pvc.
> >
> > v2:
> >  - Re-add missing test for !HAS_EXTRA_GT_LIST in intel_gt_probe_all().
> >
> > Cc: Aravind Iddamsetty 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_engine_cs.c |  2 +-
> >  drivers/gpu/drm/i915/gt/intel_gt.c| 54 ---
> >  drivers/gpu/drm/i915/gt/intel_gt.h|  1 -
> >  drivers/gpu/drm/i915/gt/intel_gt_types.h  |  3 ++
> >  drivers/gpu/drm/i915/i915_drv.h   |  2 +
> >  drivers/gpu/drm/i915/intel_device_info.h  | 16 ++
> >  .../gpu/drm/i915/selftests/mock_gem_device.c  |  1 +
> >  7 files changed, 70 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
> > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > index 275ad72940c1..41acc285e8bf 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> > @@ -736,7 +736,7 @@ static intel_engine_mask_t init_engine_mask(struct 
> > intel_gt *gt)
> > u16 vdbox_mask;
> > u16 vebox_mask;
> >
> > -   info->engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
> > +   GEM_BUG_ON(!info->engine_mask);
> >
> > if (GRAPHICS_VER(i915) < 11)
> > return info->engine_mask;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt.c
> > index e4bac2431e41..5b4263c708cc 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > @@ -815,20 +815,16 @@ static void
> >  intel_gt_tile_cleanup(struct intel_gt *gt)
> >  {
> > intel_uncore_cleanup_mmio(gt->uncore);
> > -
> > -   if (!gt_is_root(gt)) {
> > -   kfree(gt->uncore->debug);
> > -   kfree(gt->uncore);
> > -   kfree(gt);
> > -   }
>
> In this patch I see less free...
>
> >  }
> >
> >  int intel_gt_probe_all(struct drm_i915_private *i915)
> >  {
> > struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
> > struct intel_gt *gt = &i915->gt0;
> > +   const struct intel_gt_definition *gtdef;
> > phys_addr_t phys_addr;
> > unsigned int mmio_bar;
> > +   unsigned int i;
> > int ret;
> >
> > mmio_bar = GRAPHICS_VER(i915) == 2 ? GEN2_GTTMMADR_BAR : GTTMMADR_BAR;
> > @@ -839,14 +835,58 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
> >  * and it has been already initialized early during probe
> >  * in i915_driver_probe()
> >  */
> > +   gt->i915 = i915;
> > +   gt->name = "Primary GT";
> > +   gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
> > +
> > +   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
> > ret = intel_gt_tile_setup(gt, phys_addr);
> > if (ret)
> > return ret;
> >
> > i915->gt[0] = gt;
> >
> > -   /* TODO: add more tiles */
> > +   if (!HAS_EXTRA_GT_LIST(i915))
> > +   return 0;
> > +
> > +   for (i = 1, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1];
> > +gtdef->setup != NULL;
> > +i++, gtdef = &INTEL_INFO(i915)->extra_gt_list[i - 1]) {
> > +   gt = drmm_kzalloc(&i915->drm, sizeof(*gt), GFP_KERNEL);
>
> ... and more allocs...
>
> it probably deserves some smaller patches with some explanations?
>
> or something is indeed missing here?
>
> > +   if (!gt) {
> > +   ret = -ENOMEM;
> > +   goto err;
> > +   }
> > +
> > +   gt->i915 = i915;
> > +   gt->name = gtdef->name;
> > +   gt->type = gtdef->type;
> > +   gt->info.engine_mask = gtdef->engine_mask;
> > +   gt->info.id = i;
> > +
> > +   drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
> > +   if (GEM_WARN_ON(range_overflows_t(resource_size_t,
> > + gtdef->mapping_base,
> > + SZ_16M,
> > + pci_resource_len(pdev, 
> > mmio_bar {
> > +   ret = -ENODEV;
> > +   goto err;
> > +   }
> > +
> > +   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
> > +   if (ret)
> > +   goto err;
> > +
> > +   i915->gt[i] = gt;
> > +   }
> > +
> > return 0;
> > +
> > +err:
> > +   i915_probe_error(i915, "F

Re: [Intel-gfx] [PATCH 3/6] drm/i915/xelpmp: Expose media as another GT

2022-09-06 Thread Dixit, Ashutosh
On Mon, 05 Sep 2022 02:11:16 -0700, Jani Nikula wrote:
>

Copying author, these patches are from a different series
(https://patchwork.freedesktop.org/series/107908/) as mentioned in the
cover letter.

> On Fri, 02 Sep 2022, Ashutosh Dixit  wrote:
> > From: Matt Roper 
> >
> > Xe_LPM+ platforms have "standalone media."  I.e., the media unit is
> > designed as an additional GT with its own engine list, GuC, forcewake,
> > etc.  Let's allow platforms to include media GTs in their device info.
> >
> > Cc: Aravind Iddamsetty 
> > Signed-off-by: Matt Roper 
> > ---
> >  drivers/gpu/drm/i915/Makefile|  1 +
> >  drivers/gpu/drm/i915/gt/intel_gt.c   | 12 ++--
> >  drivers/gpu/drm/i915/gt/intel_gt_regs.h  |  8 +
> >  drivers/gpu/drm/i915/gt/intel_sa_media.c | 39 
> >  drivers/gpu/drm/i915/gt/intel_sa_media.h | 15 +
> >  drivers/gpu/drm/i915/i915_pci.c  | 15 +
> >  drivers/gpu/drm/i915/intel_device_info.h |  5 ++-
> >  drivers/gpu/drm/i915/intel_uncore.c  | 16 --
> >  drivers/gpu/drm/i915/intel_uncore.h  | 20 ++--
> >  9 files changed, 123 insertions(+), 8 deletions(-)
> >  create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.c
> >  create mode 100644 drivers/gpu/drm/i915/gt/intel_sa_media.h
> >
> > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> > index 522ef9b4aff3..e83e4cd46968 100644
> > --- a/drivers/gpu/drm/i915/Makefile
> > +++ b/drivers/gpu/drm/i915/Makefile
> > @@ -123,6 +123,7 @@ gt-y += \
> > gt/intel_ring.o \
> > gt/intel_ring_submission.o \
> > gt/intel_rps.o \
> > +   gt/intel_sa_media.o \
> > gt/intel_sseu.o \
> > gt/intel_sseu_debugfs.o \
> > gt/intel_timeline.o \
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt.c
> > index 57a6488c0e14..bfe77d01f747 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> > @@ -776,10 +776,15 @@ void intel_gt_driver_late_release_all(struct 
> > drm_i915_private *i915)
> > }
> >  }
> >
> > -static int intel_gt_tile_setup(struct intel_gt *gt, phys_addr_t phys_addr)
> > +static int intel_gt_tile_setup(struct intel_gt *gt,
> > +  phys_addr_t phys_addr,
> > +  u32 gsi_offset)
> >  {
> > int ret;
> >
> > +   /* GSI offset is only applicable for media GTs */
> > +   drm_WARN_ON(>->i915->drm, gsi_offset);
> > +
> > if (!gt_is_root(gt)) {
> > struct intel_uncore_mmio_debug *mmio_debug;
> > struct intel_uncore *uncore;
> > @@ -840,7 +845,7 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
> > gt->info.engine_mask = RUNTIME_INFO(i915)->platform_engine_mask;
> >
> > drm_dbg(&i915->drm, "Setting up %s\n", gt->name);
> > -   ret = intel_gt_tile_setup(gt, phys_addr);
> > +   ret = intel_gt_tile_setup(gt, phys_addr, 0);
> > if (ret)
> > return ret;
> >
> > @@ -873,7 +878,8 @@ int intel_gt_probe_all(struct drm_i915_private *i915)
> > goto err;
> > }
> >
> > -   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base);
> > +   ret = gtdef->setup(gt, phys_addr + gtdef->mapping_base,
> > +  gtdef->gsi_offset);
> > if (ret)
> > goto err;
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > index d414785003cc..fb2c56777480 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > @@ -1578,4 +1578,12 @@
> >
> >  #define GEN12_SFC_DONE(n)  _MMIO(0x1cc000 + (n) * 0x1000)
> >
> > +/*
> > + * Standalone Media's non-engine GT registers are located at their regular 
> > GT
> > + * offsets plus 0x38.  This extra offset is stored inside the 
> > intel_uncore
> > + * structure so that the existing code can be used for both GTs without
> > + * modification.
> > + */
> > +#define MTL_MEDIA_GSI_BASE 0x38
> > +
> >  #endif /* __INTEL_GT_REGS__ */
> > diff --git a/drivers/gpu/drm/i915/gt/intel_sa_media.c 
> > b/drivers/gpu/drm/i915/gt/intel_sa_media.c
> > new file mode 100644
> > index ..8c5c519457cc
> > --- /dev/null
> > +++ b/drivers/gpu/drm/i915/gt/intel_sa_media.c
> > @@ -0,0 +1,39 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2021 Intel Corporation
> > + */
> > +
> > +#include 
> > +
> > +#include "i915_drv.h"
> > +#include "gt/intel_gt.h"
> > +#include "gt/intel_sa_media.h"
> > +
> > +int intel_sa_mediagt_setup(struct intel_gt *gt, phys_addr_t phys_addr,
> > +  u32 gsi_offset)
> > +{
> > +   struct drm_i915_private *i915 = gt->i915;
> > +   struct intel_uncore *uncore;
> > +
> > +   uncore = drmm_kzalloc(&i915->drm, sizeof(*uncore), GFP_KERNEL);
> > +   if (!uncore)
> > +   return -ENOMEM;
> > +
> > +   uncore->gsi_offset = gsi_offset

Re: [Intel-gfx] [PATCH] drm/i915: Let's avoid even early init if SLPC is used.

2022-09-06 Thread Dixit, Ashutosh
On Tue, 06 Sep 2022 13:33:49 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

>  void intel_rps_init_early(struct intel_rps *rps)
>  {
> + if (rps_wants_slpc(rps))
> + return;
> +

So what happens if we "want" SLPC but finally could not enable it (switch
slpc to "in use") and have to fall back to host turbo? If that's a valid
possibility then we can't do the above.

From drivers/gpu/drm/i915/gt/uc/intel_uc.h

 * - Not supported: not available in HW and/or firmware not defined.
 * - Supported: available in HW and firmware defined.
 * - Wanted: supported + enabled in modparam.
 * - In use: wanted + firmware found on the system and successfully fetched.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [RFC PATCH 2/2] Fix per client busyness locking

2022-09-07 Thread Dixit, Ashutosh
On Tue, 06 Sep 2022 11:29:15 -0700, Umesh Nerlige Ramappa wrote:
>
> On Thu, Sep 01, 2022 at 04:55:22PM -0700, Dixit, Ashutosh wrote:
> > On Wed, 31 Aug 2022 15:45:49 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >
> > Hi Umesh,
> >
> > I have updated my RFC patch based on your feedback so we can discuss again.
> >
> >> On Wed, Aug 31, 2022 at 12:33:55PM -0700, Ashutosh Dixit wrote:
> >> > 1. Do all ce->stats updates and reads under guc->timestamp.lock
> >>
> >> Other than the question about READ_ONCE/WRITE_ONCE, I am not sure I
> >> understand what's broken in the locking logic here. Can you please add some
> >> description? thanks
> >
> > Basically ce->stats.runtime.total and ce->stats.active are tied together so
> > should be accessed/updated atomically. Also just the update of
> > ce->stats.runtime.total (via lrc_update_runtime()) can have multiple
> > concurrent writers so even that has to be protected (since that update is
> > not atomic).
> >
> > These was the reason for:
> > * Introducing lrc_update_runtime_locked
> > * Returning early from intel_context_get_total_runtime_ns otherwise we'll
> >  need to introduce the same locking there
> > * Enforcing locking in guc_context_update_stats (which was there in your
> >  original patch too)
> >
> > (I think execlists code didn't have this issue because there
> > lrc_update_runtime is called from a tasklet (which iirc is like a single
> > thread/cpu). I am not sure how 'active' is updated in execlist mode so
> > there may or may not be a problem in intel_context_get_total_runtime_ns).
> >
> > I have another question: what about that GPU vs. GuC race mentioned in the
> > comment? What is the sequence of events there between GPU updating the
> > timestamp in the context image vs. GuC setting engine_id to -1 (at least
> > what is the sequence in which GPU and GuC supposed to do these updates
> > though it may not matter to i915 due to scheduling delays)?
>
> With GPU, KMD and GuC running concurrently, after a context is done, this
> is the sequence.
>
> GPU) updates context image (total_runtime)
> KMD) reads total_runtime.
> KMD) sees engine_id is valid. KMD uses start_time from pphwsp to calculate
> active_time.
> KMD) returns total_runtime + active_time (double accounted busyness!!)
> GuC) gets ctxt switchout event and sets engine_id and start_time to ~0

Thanks for the explanation, so yes I understand the double accounting now.

>
> This is not rare when running the IGT tests, so the total runtime is
> updated in the query only if engine_id is idle. continued below ...
>
> >
> >>
> >> > 2. Pin context image before reading
> >> > 3. Merge __guc_context_update_clks and guc_context_update_stats into a
> >> >   single function
> >> > 4. Call lrc_update_runtime() unconditionally in guc_context_update_stats
> >> > 5. Seems no need to update ce->stats.active with this approach
> >> >
> >> > Some of the above steps may not be correct or complete but the idea is to
> >> > discuss/improve the original patch.
> >> >
> >> > Cc: Umesh Nerlige Ramappa 
> >> > Signed-off-by: Ashutosh Dixit 
> >> > ---
> >> > drivers/gpu/drm/i915/gt/intel_context.c   |  2 +-
> >> > drivers/gpu/drm/i915/gt/intel_context_types.h |  2 +-
> >> > .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 41 ++-
> >> > 3 files changed, 24 insertions(+), 21 deletions(-)
> >> >
> >> > diff --git a/drivers/gpu/drm/i915/gt/intel_context.c 
> >> > b/drivers/gpu/drm/i915/gt/intel_context.c
> >> > index e2d70a9fdac0..c004f676de27 100644
> >> > --- a/drivers/gpu/drm/i915/gt/intel_context.c
> >> > +++ b/drivers/gpu/drm/i915/gt/intel_context.c
> >> > @@ -581,7 +581,7 @@ u64 intel_context_get_total_runtime_ns(struct 
> >> > intel_context *ce)
> >> >  u64 total, active;
> >> >
> >> >  if (ce->ops->update_stats)
> >> > -ce->ops->update_stats(ce);
> >> > +return ce->ops->update_stats(ce);
> >>
> >> This is an improvement that we can add and eventually, we can make this a
> >> GuC specific vfunc. Doing so may also remove the COPS_RUNTIME_ACTIVE_TOTAL
> >> option that I added to GuC specific context ops.
> >
> > I went ahead and did this in v2 of the RFC patch.
> >
> >> >
> >> >  tot

Re: [Intel-gfx] [PATCH 4/6] drm/i915/debugfs: Add perf_limit_reasons in debugfs

2022-09-07 Thread Dixit, Ashutosh
On Tue, 06 Sep 2022 07:13:03 -0700, Rodrigo Vivi wrote:
>

Copying author.

> On Fri, Sep 02, 2022 at 04:53:00PM -0700, Ashutosh Dixit wrote:
> > From: Tilak Tangudu 
> >
> > Add perf_limit_reasons in debugfs. Unlike the lower 16 perf_limit_reasons
> > status bits, the upper 16 log bits remain set until cleared, thereby
> > ensuring the throttling occurrence is not missed. The clear fop clears
> > the upper 16 log bits, the get fop gets all 32 log and status bits.
> >
> > Signed-off-by: Tilak Tangudu 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++
> >  drivers/gpu/drm/i915/i915_reg.h   |  1 +
> >  2 files changed, 28 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > index 108b9e76c32e..5c95cba5e5df 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > @@ -655,6 +655,32 @@ static bool rps_eval(void *data)
> >
> >  DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
> >
> > +static int perf_limit_reasons_get(void *data, u64 *val)
> > +{
> > +   struct intel_gt *gt = data;
> > +   intel_wakeref_t wakeref;
> > +
> > +   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> > +   *val = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS);
> > +
> > +   return 0;
> > +}
> > +
> > +static int perf_limit_reasons_clear(void *data, u64 val)
> > +{
> > +   struct intel_gt *gt = data;
> > +   intel_wakeref_t wakeref;
> > +
> > +   /* Clear the upper 16 log bits, the lower 16 status bits are read-only 
> > */
> > +   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> > +   intel_uncore_rmw(gt->uncore, GT0_PERF_LIMIT_REASONS,
> > +GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
> > +
> > +   return 0;
> > +}
> > +DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
> > +   perf_limit_reasons_clear, "%llu\n");
> > +
> >  void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
> >  {
> > static const struct intel_gt_debugfs_file files[] = {
> > @@ -664,6 +690,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, 
> > struct dentry *root)
> > { "forcewake_user", &forcewake_user_fops, NULL},
> > { "llc", &llc_fops, llc_eval },
> > { "rps_boost", &rps_boost_fops, rps_eval },
> > +   { "perf_limit_reasons", &perf_limit_reasons_fops, NULL },
> > };
> >
> > intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 5e6239864c35..10126995e1f6 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -1802,6 +1802,7 @@
> >  #define   POWER_LIMIT_4_MASK   REG_BIT(9)
> >  #define   POWER_LIMIT_1_MASK   REG_BIT(11)
> >  #define   POWER_LIMIT_2_MASK   REG_BIT(12)
> > +#define   GT0_PERF_LIMIT_REASONS_LOG_MASK REG_GENMASK(31, 16)
>
> Is this valid for all platforms?
> What does the bits are really telling us?
> Could we expand the reasons? The previous bits we know exactly
> what kind of limits we are dealing of, but with this combined
> one without any explanation I'm afraid this will bring more
> confusion than help. We will get bugged by many folks trying
> to debug this out there when bit 13, for instance, is set.
> "What does bit 13 mean?" will be a recurrent question with
> only a tribal knowledge kind of answer.
>
> >
> >  #define CHV_CLK_CTL1   _MMIO(0x101100)
> >  #define VLV_CLK_CTL2   _MMIO(0x101104)
> > --
> > 2.34.1
> >


Re: [Intel-gfx] [RFC PATCH 2/2] Fix per client busyness locking

2022-09-07 Thread Dixit, Ashutosh
On Wed, 07 Sep 2022 00:28:48 -0700, Tvrtko Ursulin wrote:
>
> On 06/09/2022 19:29, Umesh Nerlige Ramappa wrote:
> > On Thu, Sep 01, 2022 at 04:55:22PM -0700, Dixit, Ashutosh wrote:
> >> On Wed, 31 Aug 2022 15:45:49 -0700, Umesh Nerlige Ramappa wrote:
>
> [snip]
>
> >>> >
> >>> >    intel_gt_reset_unlock(gt, srcu);
> >>> >
> >>> > @@ -1476,17 +1476,21 @@ void intel_guc_busyness_unpark(struct
> >>> intel_gt *gt)
> >>> > guc->timestamp.ping_delay);
> >>> > }
> >>> >
> >>> > -static void __guc_context_update_clks(struct intel_context *ce)
> >>> > +static u64 guc_context_update_stats(struct intel_context *ce)
> >>> > {
> >>> >    struct intel_guc *guc = ce_to_guc(ce);
> >>> >    struct intel_gt *gt = ce->engine->gt;
> >>> >    u32 *pphwsp, last_switch, engine_id;
> >>> > -    u64 start_gt_clk, active;
> >>> >    unsigned long flags;
> >>> > +    u64 total, active = 0;
> >>> >    ktime_t unused;
> >>> >
> >>> > +    intel_context_pin(ce);
> >>>
> >>> intel_context_pin can sleep and we are not allowed to sleep in this
> >>> path -
> >>> intel_context_get_total_runtime_ns(), however we can sleep in the ping
> >>> worker path, so ideally we want to separate it out for the 2 paths.
> >>
> >> Do we know which intel_context_get_total_runtime_ns() call is not allowed
> >> to sleep? E.g. the code path from i915_drm_client_fdinfo() is allowed to
> >> sleep. As mentioned I have done this is v2 of RFC patch which I think is
> >> sufficient, but a more complicated scheme (which I think we can avoid for
> >> now) would be to pin in code paths when sleeping is allowed.
> >>
> >
> > Hmm, maybe intel_context_get_total_runtime_ns can sleep, not sure why I
> > was assuming that this falls in the perf_pmu path. This is now in the
> > drm_fdinfo query path. + Tvrtko.
> >
> > @Tvrtko, any idea if intel_context_get_total_runtime_ns path can sleep?
>
> Not at the moment - it calls it from a lockless (rcu) section when walking
> all the contexts belonging to a client. Idea being performance queries
> should have minimum effect on a running system.

Hmm my bad, missing the rcu and assuming a userspace thread will be able to
sleep.

> I think it would be possible to change in theory but not sure how much
> work. There is a hard need to sleep in there or what?

GuC contexts need to be pinned/unpinned which can sleep but investigating
if we can return a previously computed busyness when we cannot pin/sleep.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 6/6] drm/i915/rps: Freq caps for MTL

2022-09-07 Thread Dixit, Ashutosh
On Mon, 05 Sep 2022 02:40:08 -0700, Jani Nikula wrote:
> On Fri, 02 Sep 2022, Ashutosh Dixit  wrote:
> > For MTL, when reading from HW, RP0, RP1 (actuall RPe) and RPn freq use an
> > entirely different set of registers with different fields, bitwidths and
> > units.
> >
> > Cc: Badal Nilawar 
> > Signed-off-by: Ashutosh Dixit 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_rps.c | 20 
> >  drivers/gpu/drm/i915/i915_reg.h |  9 +
> >  2 files changed, 29 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
> > b/drivers/gpu/drm/i915/gt/intel_rps.c
> > index 579ae9ac089c..e7ab172698e3 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> > @@ -1085,6 +1085,23 @@ static u32 intel_rps_read_state_cap(struct intel_rps 
> > *rps)
> > return intel_uncore_read(uncore, GEN6_RP_STATE_CAP);
> >  }
> >
> > +static void
> > +mtl_get_freq_caps(struct intel_rps *rps, struct intel_rps_freq_caps *caps)
> > +{
> > +   struct intel_uncore *uncore = rps_to_uncore(rps);
> > +   u32 rp_state_cap = rps_to_gt(rps)->type == GT_MEDIA ?
> > +   intel_uncore_read(uncore, MTL_MEDIAP_STATE_CAP) 
> > :
> > +   intel_uncore_read(uncore, MTL_RP_STATE_CAP);
> > +   u32 rpe = rps_to_gt(rps)->type == GT_MEDIA ?
> > +   intel_uncore_read(uncore, MTL_MPE_FREQUENCY) :
> > +   intel_uncore_read(uncore, MTL_GT_RPE_FREQUENCY);
> > +
> > +   /* MTL values are in units of 16.67 MHz */
> > +   caps->rp0_freq = REG_FIELD_GET(MTL_RP0_CAP_MASK, rp_state_cap);
> > +   caps->min_freq = REG_FIELD_GET(MTL_RPN_CAP_MASK, rp_state_cap);
> > +   caps->rp1_freq = REG_FIELD_GET(MTL_RPE_MASK, rpe);
> > +}
> > +
> >  /**
> >   * gen6_rps_get_freq_caps - Get freq caps exposed by HW
> >   * @rps: the intel_rps structure
> > @@ -1098,6 +1115,9 @@ void gen6_rps_get_freq_caps(struct intel_rps *rps, 
> > struct intel_rps_freq_caps *c
> > struct drm_i915_private *i915 = rps_to_i915(rps);
> > u32 rp_state_cap;
> >
> > +   if (IS_METEORLAKE(i915))
> > +   return mtl_get_freq_caps(rps, caps);
> > +
>
> Please make gen6_rps_get_freq_caps() static, and add
>
> intel_rps_get_freq_caps()
> {
>   if (IS_METEORLAKE(i915))
>   return mtl_get_freq_caps(rps, caps);
>   else
>   return gen6_rps_get_freq_caps(rps, caps);
> }
>
> Or something.

A general name like intel_rps_get_freq_caps name does not sit well with the
current code. intel_rps_get_freq_caps was actually used in earlier versions
of the patch:

https://patchwork.freedesktop.org/patch/479179/?series=101606&rev=3

but was later changed to gen6_rps_get_freq_caps based on review
comments. Afaiu in i915 a name such as gen6_rps_get_freq_caps implies "Gen6
and later" and the gen6_rps_get_freq_caps name has actually proved quite
useful in reminding people that there are earlier/other generations not
covered by the function. See intel_rps_init.

Further the call stack is:

intel_rps_init -> gen6_rps_init -> gen6_rps_get_freq_caps

So it would look odd if we called intel_rps_get_freq_caps from
gen6_rps_init.

Therefore what I have done in v2 is:

s/gen6_rps_get_freq_caps/__gen6_rps_get_freq_caps/

and then

gen6_rps_get_freq_caps()
{
if (IS_METEORLAKE(i915))
return mtl_get_freq_caps(rps, caps);
else
return __gen6_rps_get_freq_caps(rps, caps);
}

Thanks.
--
Ashutosh

> > rp_state_cap = intel_rps_read_state_cap(rps);
> >
> > /* static values from HW: RP0 > RP1 > RPn (min_freq) */
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 06d555321651..d78f9675aa57 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -1792,6 +1792,15 @@
> >  #define XEHPSDV_RP_STATE_CAP   _MMIO(0x250014)
> >  #define PVC_RP_STATE_CAP   _MMIO(0x281014)
> >
> > +#define MTL_RP_STATE_CAP   _MMIO(0x138000)
> > +#define MTL_MEDIAP_STATE_CAP   _MMIO(0x138020)
> > +#define   MTL_RP0_CAP_MASK REG_GENMASK(8, 0)
> > +#define   MTL_RPN_CAP_MASK REG_GENMASK(24, 16)
> > +
> > +#define MTL_GT_RPE_FREQUENCY   _MMIO(0x13800c)
> > +#define MTL_MPE_FREQUENCY  _MMIO(0x13802c)
> > +#define   MTL_RPE_MASK REG_GENMASK(8, 0)
> > +
> >  #define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8)
> >  #define   GT0_PERF_LIMIT_REASONS_MASK  0xde3
> >  #define   PROCHOT_MASK REG_BIT(1)
>
> --
> Jani Nikula, Intel Open Source Graphics Center


Re: [Intel-gfx] [PATCH 5/6] drm/i915/mtl: PERF_LIMIT_REASONS changes for MTL

2022-09-07 Thread Dixit, Ashutosh
On Mon, 05 Sep 2022 02:30:45 -0700, Jani Nikula wrote:
>
> On Fri, 02 Sep 2022, Ashutosh Dixit  wrote:
> > PERF_LIMIT_REASONS register for MTL media gt is different now.
> >
> > Cc: Badal Nilawar 
> > Signed-off-by: Ashutosh Dixit 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt.h| 8 
> >  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 4 ++--
> >  drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   | 6 +++---
> >  drivers/gpu/drm/i915/i915_reg.h   | 1 +
> >  4 files changed, 14 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h 
> > b/drivers/gpu/drm/i915/gt/intel_gt.h
> > index c9a359f35d0f..7286d47113ee 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt.h
> > @@ -9,6 +9,7 @@
> >  #include "intel_engine_types.h"
> >  #include "intel_gt_types.h"
> >  #include "intel_reset.h"
> > +#include "i915_reg.h"
> >
> >  struct drm_i915_private;
> >  struct drm_printer;
> > @@ -86,6 +87,13 @@ static inline bool intel_gt_is_wedged(const struct 
> > intel_gt *gt)
> > return unlikely(test_bit(I915_WEDGED, >->reset.flags));
> >  }
> >
> > +static inline
> > +i915_reg_t intel_gt_perf_limit_reasons_reg(struct intel_gt *gt)
> > +{
> > +   return gt->type == GT_MEDIA ?
> > +   MTL_MEDIA_PERF_LIMIT_REASONS : GT0_PERF_LIMIT_REASONS;
> > +}
>
> Nowadays, I pretty much think of everything from the standpoint of
> setting the example for future changes. Is this what we want people to
> copy? Because that's what we do, look for examples for what we want to
> achieve, and emulate.
>
> Do we want this to be duplicated for other registers? Choose register
> offset based on platform/engine/fusing/whatever parameter? Is this a
> register definition that should be in a _regs.h file?
>
> I don't know.

MTL_MEDIA_PERF_LIMIT_REASONS is an actual register so I'd think it needs to
be in a _regs.h file. And here we need to choose the register offset at
runtime based on the gt. So I don't see any way round what's happening
above unless you have other suggestions.

> I've also grown to dislike static inlines a lot, and this one's the
> worst because it actually can't be static inline because its passed as a
> function pointer.

Based on your feedback I've eliminated the static inline and moved the
function definition to a .c in v2 (though gcc allows taking addresses of
static inline's in .h files).

Thanks.
--
Ashutosh

>
>
> BR,
> Jani.
>
>
>
> > +
> >  int intel_gt_probe_all(struct drm_i915_private *i915);
> >  int intel_gt_tiles_init(struct drm_i915_private *i915);
> >  void intel_gt_release_all(struct drm_i915_private *i915);
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > index 5c95cba5e5df..fe0091f953c1 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > @@ -661,7 +661,7 @@ static int perf_limit_reasons_get(void *data, u64 *val)
> > intel_wakeref_t wakeref;
> >
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> > -   *val = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS);
> > +   *val = intel_uncore_read(gt->uncore, 
> > intel_gt_perf_limit_reasons_reg(gt));
> >
> > return 0;
> >  }
> > @@ -673,7 +673,7 @@ static int perf_limit_reasons_clear(void *data, u64 val)
> >
> > /* Clear the upper 16 log bits, the lower 16 status bits are read-only 
> > */
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> > -   intel_uncore_rmw(gt->uncore, GT0_PERF_LIMIT_REASONS,
> > +   intel_uncore_rmw(gt->uncore, 
> > intel_gt_perf_limit_reasons_reg(gt),
> >  GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
> >
> > return 0;
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
> > index e066cc33d9f2..54deae45d81f 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c
> > @@ -510,7 +510,7 @@ struct intel_gt_bool_throttle_attr {
> > struct attribute attr;
> > ssize_t (*show)(struct device *dev, struct device_attribute *attr,
> > char *buf);
> > -   i915_reg_t reg32;
> > +   i915_reg_t (*reg32)(struct intel_gt *gt);
> > u32 mask;
> >  };
> >
> > @@ -521,7 +521,7 @@ static ssize_t throttle_reason_bool_show(struct device 
> > *dev,
> > struct intel_gt *gt = intel_gt_sysfs_get_drvdata(dev, attr->attr.name);
> > struct intel_gt_bool_throttle_attr *t_attr =
> > (struct intel_gt_bool_throttle_attr *) attr;
> > -   bool val = rps_read_mask_mmio(>->rps, t_attr->reg32, t_attr->mask);
> > +   bool val = rps_read_mask_mmio(>->rps, t_attr->reg32(gt), 
> > t_attr->mask);
> >
> > return sysfs_emit(buff, "%u\n", val);
> >  }
> > @@ -530,7 +530,7 @@ static ssize_t throttle_reason_bool_show(struct device 
> > *dev,
> >  struct intel_gt_bool_throttle_attr a

Re: [Intel-gfx] [PATCH 4/6] drm/i915/debugfs: Add perf_limit_reasons in debugfs

2022-09-07 Thread Dixit, Ashutosh
On Tue, 06 Sep 2022 07:13:03 -0700, Rodrigo Vivi wrote:
>

Hi Rodrigo,

> On Fri, Sep 02, 2022 at 04:53:00PM -0700, Ashutosh Dixit wrote:
> > From: Tilak Tangudu 
> >
> > Add perf_limit_reasons in debugfs. Unlike the lower 16 perf_limit_reasons
> > status bits, the upper 16 log bits remain set until cleared, thereby
> > ensuring the throttling occurrence is not missed. The clear fop clears
> > the upper 16 log bits, the get fop gets all 32 log and status bits.
> >
> > Signed-off-by: Tilak Tangudu 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 27 +++
> >  drivers/gpu/drm/i915/i915_reg.h   |  1 +
> >  2 files changed, 28 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > index 108b9e76c32e..5c95cba5e5df 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > @@ -655,6 +655,32 @@ static bool rps_eval(void *data)
> >
> >  DEFINE_INTEL_GT_DEBUGFS_ATTRIBUTE(rps_boost);
> >
> > +static int perf_limit_reasons_get(void *data, u64 *val)
> > +{
> > +   struct intel_gt *gt = data;
> > +   intel_wakeref_t wakeref;
> > +
> > +   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> > +   *val = intel_uncore_read(gt->uncore, GT0_PERF_LIMIT_REASONS);
> > +
> > +   return 0;
> > +}
> > +
> > +static int perf_limit_reasons_clear(void *data, u64 val)
> > +{
> > +   struct intel_gt *gt = data;
> > +   intel_wakeref_t wakeref;
> > +
> > +   /* Clear the upper 16 log bits, the lower 16 status bits are read-only 
> > */
> > +   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
> > +   intel_uncore_rmw(gt->uncore, GT0_PERF_LIMIT_REASONS,
> > +GT0_PERF_LIMIT_REASONS_LOG_MASK, 0);
> > +
> > +   return 0;
> > +}
> > +DEFINE_SIMPLE_ATTRIBUTE(perf_limit_reasons_fops, perf_limit_reasons_get,
> > +   perf_limit_reasons_clear, "%llu\n");
> > +
> >  void intel_gt_pm_debugfs_register(struct intel_gt *gt, struct dentry *root)
> >  {
> > static const struct intel_gt_debugfs_file files[] = {
> > @@ -664,6 +690,7 @@ void intel_gt_pm_debugfs_register(struct intel_gt *gt, 
> > struct dentry *root)
> > { "forcewake_user", &forcewake_user_fops, NULL},
> > { "llc", &llc_fops, llc_eval },
> > { "rps_boost", &rps_boost_fops, rps_eval },
> > +   { "perf_limit_reasons", &perf_limit_reasons_fops, NULL },
> > };
> >
> > intel_gt_debugfs_register_files(root, files, ARRAY_SIZE(files), gt);
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 5e6239864c35..10126995e1f6 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -1802,6 +1802,7 @@
> >  #define   POWER_LIMIT_4_MASK   REG_BIT(9)
> >  #define   POWER_LIMIT_1_MASK   REG_BIT(11)
> >  #define   POWER_LIMIT_2_MASK   REG_BIT(12)
> > +#define   GT0_PERF_LIMIT_REASONS_LOG_MASK REG_GENMASK(31, 16)
>
> Is this valid for all platforms?

Yes, looks like it.

> What does the bits are really telling us?

The v1 commit message above hinted at what was happening, I've clarified
the commit message in v2 as follows:

Add perf_limit_reasons in debugfs. The upper 16 perf_limit_reasons RW "log"
bits are identical to the lower 16 RO "status" bits except that the "log"
bits remain set until cleared, thereby ensuring the throttling occurrence
is not missed. The clear fop clears the upper 16 "log" bits, the get fop
gets all 32 "log" and "status" bits.

I've also expanded the comment in perf_limit_reasons_clear() to explain this.

> Could we expand the reasons? The previous bits we know exactly
> what kind of limits we are dealing of, but with this combined
> one without any explanation I'm afraid this will bring more
> confusion than help. We will get bugged by many folks trying
> to debug this out there when bit 13, for instance, is set.
> "What does bit 13 mean?" will be a recurrent question with
> only a tribal knowledge kind of answer.

I think the new commit message above and comment has the answer to this
now. Also, won't there be a public copy of the Bspec where someone can look
up the bit definitions?

Also, are these "log" bits useful enough to expose them in sysfs like we
have the lower "status" bits exposed today but that is probably the
question for a different patch.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 5/8] drm/i915/gt: Fix perf limit reasons bit positions

2022-09-08 Thread Dixit, Ashutosh
On Thu, 08 Sep 2022 05:37:08 -0700, Sundaresan, Sujaritha wrote:
>
> On 9/8/2022 4:12 PM, Andi Shyti wrote:
> > Hi,
> >
> > On Wed, Sep 07, 2022 at 10:21:53PM -0700, Ashutosh Dixit wrote:
> >> Perf limit reasons bit positions were off by one.
> >>
> >> Fixes: fa68bff7cf27 ("drm/i915/gt: Add sysfs throttle frequency 
> >> interfaces")
> >> Cc: sta...@vger.kernel.org # v5.18+
> >> Cc: Sujaritha Sundaresan 
> >> Cc: Andi Shyti 
> >> Signed-off-by: Ashutosh Dixit 
> > Thanks Ashutosh!
> >
> >> ---
> >>   drivers/gpu/drm/i915/i915_reg.h | 16 
> >>   1 file changed, 8 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> >> b/drivers/gpu/drm/i915/i915_reg.h
> >> index c413eec3373f..24009786f88b 100644
> >> --- a/drivers/gpu/drm/i915/i915_reg.h
> >> +++ b/drivers/gpu/drm/i915/i915_reg.h
> >> @@ -1794,14 +1794,14 @@
> >> #define GT0_PERF_LIMIT_REASONS _MMIO(0x1381a8)
> >>   #define   GT0_PERF_LIMIT_REASONS_MASK0xde3
> >> -#define   PROCHOT_MASKREG_BIT(1)
> >> -#define   THERMAL_LIMIT_MASK  REG_BIT(2)
> >> -#define   RATL_MASK   REG_BIT(6)
> >> -#define   VR_THERMALERT_MASK  REG_BIT(7)
> >> -#define   VR_TDC_MASK REG_BIT(8)
> >> -#define   POWER_LIMIT_4_MASK  REG_BIT(9)
> >> -#define   POWER_LIMIT_1_MASK  REG_BIT(11)
> >> -#define   POWER_LIMIT_2_MASK  REG_BIT(12)
> >> +#define   PROCHOT_MASKREG_BIT(0)
> >> +#define   THERMAL_LIMIT_MASK  REG_BIT(1)
> >> +#define   RATL_MASK   REG_BIT(5)
> >> +#define   VR_THERMALERT_MASK  REG_BIT(6)
> >> +#define   VR_TDC_MASK REG_BIT(7)
> >> +#define   POWER_LIMIT_4_MASK  REG_BIT(8)
> >> +#define   POWER_LIMIT_1_MASK  REG_BIT(10)
> >> +#define   POWER_LIMIT_2_MASK  REG_BIT(11)
> > Sujaritha, could you please check and r-b this one?
> >
> > Thanks,
> > Andi
>
> Looks good. I've checked the reg bits.
>
> Reviewed-by : Sujaritha Sundaresan 
 ^
I will fix it so no need to resend but just FYI Patchwork doesn't like the
extra space above so doesn't register the R-b...


Re: [Intel-gfx] [PATCH] drm/i915/gt: Fix perf limit reasons bit positions

2022-09-08 Thread Dixit, Ashutosh
On Thu, 08 Sep 2022 08:58:21 -0700, Ashutosh Dixit wrote:
>
> Perf limit reasons bit positions were off by one.
>
> Fixes: fa68bff7cf27 ("drm/i915/gt: Add sysfs throttle frequency interfaces")
> Cc: sta...@vger.kernel.org # v5.18+
> Signed-off-by: Ashutosh Dixit 
> Acked-by: Andi Shyti 
> Reviewed-by: Sujaritha Sundaresan 

I copied the A-b and R-b on this patch from:

https://patchwork.freedesktop.org/patch/501919/?series=108091&rev=2

And have also copied stable just in case. Thanks.

> ---
>  drivers/gpu/drm/i915/i915_reg.h | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index c413eec3373f..24009786f88b 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1794,14 +1794,14 @@
>
>  #define GT0_PERF_LIMIT_REASONS   _MMIO(0x1381a8)
>  #define   GT0_PERF_LIMIT_REASONS_MASK0xde3
> -#define   PROCHOT_MASK   REG_BIT(1)
> -#define   THERMAL_LIMIT_MASK REG_BIT(2)
> -#define   RATL_MASK  REG_BIT(6)
> -#define   VR_THERMALERT_MASK REG_BIT(7)
> -#define   VR_TDC_MASKREG_BIT(8)
> -#define   POWER_LIMIT_4_MASK REG_BIT(9)
> -#define   POWER_LIMIT_1_MASK REG_BIT(11)
> -#define   POWER_LIMIT_2_MASK REG_BIT(12)
> +#define   PROCHOT_MASK   REG_BIT(0)
> +#define   THERMAL_LIMIT_MASK REG_BIT(1)
> +#define   RATL_MASK  REG_BIT(5)
> +#define   VR_THERMALERT_MASK REG_BIT(6)
> +#define   VR_TDC_MASKREG_BIT(7)
> +#define   POWER_LIMIT_4_MASK REG_BIT(8)
> +#define   POWER_LIMIT_1_MASK REG_BIT(10)
> +#define   POWER_LIMIT_2_MASK REG_BIT(11)
>
>  #define CHV_CLK_CTL1 _MMIO(0x101100)
>  #define VLV_CLK_CTL2 _MMIO(0x101104)
> --
> 2.34.1
>


Re: [Intel-gfx] [PATCH 6/8] drm/i915/debugfs: Add perf_limit_reasons in debugfs

2022-09-09 Thread Dixit, Ashutosh
On Fri, 09 Sep 2022 03:13:05 -0700, Rodrigo Vivi wrote:
>
> On Wed, Sep 07, 2022 at 10:22:49PM -0700, Ashutosh Dixit wrote:
> > From: Tilak Tangudu 
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 24009786f88b..9492f8f43b25 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -1802,6 +1802,7 @@
> >  #define   POWER_LIMIT_4_MASK   REG_BIT(8)
> >  #define   POWER_LIMIT_1_MASK   REG_BIT(10)
> >  #define   POWER_LIMIT_2_MASK   REG_BIT(11)
> > +#define   GT0_PERF_LIMIT_REASONS_LOG_MASK REG_GENMASK(31, 16)
> >
>
> I'm kind of confused here because I saw the other bits in the patch 5.

Sorry Rodrigo, patch 5 is a bug-fix patch which should probably be merged
to -fixes independent of this series, I have posted it independently too:

https://patchwork.freedesktop.org/series/108277/

I was debating including patch 5 as part of this series but then it was
touching the same code so I ended up including it. Sorry for the confusion.

> but, anyway, thanks for improving the commit msg.
>
> Reviewed-by: Rodrigo Vivi 

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

2022-09-09 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> With GuC mode of submission, GuC is in control of defining the context id 
> field
> that is part of the OA reports. To filter reports, UMD and KMD must know what 
> sw
> context id was chosen by GuC. There is not interface between KMD and GuC to
> determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
> reports for the specific context.

Do you think it is worth defining an interface for GuC to return the sw
ctx_id it will be using for a ctx, say at ctx registration time?

The scheme implemented in this patch to read the ctx_id is certainly very
clever, at least to me. But as Lionel was saying is it a agreed upon
immutable interface? If it is, we can go with this patch.

(Though even then we will need to maintain this code even if in the future
GuC FW is changed to return the ctx_id in order to preserve backwards
comptability with previous GuC versions. So maybe better to have a real
interface between GuC and KMD earlier rather than later?).

Also a couple of general comments below.

>
> Signed-off-by: Umesh Nerlige Ramappa 
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>  drivers/gpu/drm/i915/i915_perf.c| 141 
>  2 files changed, 124 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h 
> b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index a390f0813c8b..7111bae759f3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -110,6 +110,8 @@ enum {
>  #define XEHP_SW_CTX_ID_WIDTH 16
>  #define XEHP_SW_COUNTER_SHIFT58
>  #define XEHP_SW_COUNTER_WIDTH6
> +#define GEN12_GUC_SW_CTX_ID_SHIFT39
> +#define GEN12_GUC_SW_CTX_ID_WIDTH16
>
>  static inline void lrc_runtime_start(struct intel_context *ce)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index f3c23fe9ad9c..735244a3aedd 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1233,6 +1233,125 @@ static struct intel_context *oa_pin_context(struct 
> i915_perf_stream *stream)
>   return stream->pinned_ctx;
>  }
>
> +static int
> +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
> +{
> + u32 *cs, cmd;
> +
> + cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> + if (GRAPHICS_VER(rq->engine->i915) >= 8)
> + cmd++;
> +
> + cs = intel_ring_begin(rq, 4);
> + if (IS_ERR(cs))
> + return PTR_ERR(cs);
> +
> + *cs++ = cmd;
> + *cs++ = i915_mmio_reg_offset(reg);
> + *cs++ = ggtt_offset;
> + *cs++ = 0;
> +
> + intel_ring_advance(rq, cs);
> +
> + return 0;
> +}
> +
> +static int
> +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
> +{
> + struct i915_request *rq;
> + int err;
> +
> + rq = i915_request_create(ce);
> + if (IS_ERR(rq))
> + return PTR_ERR(rq);
> +
> + i915_request_get(rq);
> +
> + err = __store_reg_to_mem(rq, reg, ggtt_offset);
> +
> + i915_request_add(rq);
> + if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
> + err = -ETIME;
> +
> + i915_request_put(rq);
> +
> + return err;
> +}
> +
> +static int
> +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
> +{
> + struct i915_vma *scratch;
> + u32 *val;
> + int err;
> +
> + scratch = 
> __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
> + if (IS_ERR(scratch))
> + return PTR_ERR(scratch);
> +
> + err = i915_vma_sync(scratch);
> + if (err)
> + goto err_scratch;
> +
> + err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
> +  i915_ggtt_offset(scratch));

Actually the RING_EXECLIST_STATUS_HI is MMIO so can be read using say
ENGINE_READ/intel_uncore_read. The only issue is how to read it when this
ctx is scheduled which is cleverly solved by the scheme above. But I am not
sure if there is any other simpler way to do it.

> + if (err)
> + goto err_scratch;
> +
> + val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
> + if (IS_ERR(val)) {
> + err = PTR_ERR(val);
> + goto err_scratch;
> + }
> +
> + *ctx_id = *val;
> + i915_gem_object_unpin_map(scratch->obj);
> +
> +err_scratch:
> + i915_vma_unpin_and_release(&scratch, 0);
> + return err;
> +}
> +
> +/*
> + * For execlist mode of submission, pick an unused context id
> + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> + *
> + * For GuC mode of submission read context id from the upper dword of the
> + * EXECLIST_STATUS register.
> + */
> +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> +{
> + u32 ctx_id, mask;
> + int

Re: [Intel-gfx] [PATCH 4/6] drm/i915: Use GEN12 RPSTAT register

2022-09-09 Thread Dixit, Ashutosh
On Thu, 08 Sep 2022 19:56:44 -0700, Badal Nilawar wrote:
>
> From: Don Hiatt 
>
> On GEN12, use the correct GEN12 RPSTAT register mask/shift.
>
> HSD: 1409538411

I think let's remove this.

> Cc: Don Hiatt 
> Cc: Andi Shyti 
> Signed-off-by: Don Hiatt 
> Signed-off-by: Badal Nilawar 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c |  2 +-
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   |  5 +
>  drivers/gpu/drm/i915/gt/intel_rps.c   | 17 -
>  drivers/gpu/drm/i915/gt/intel_rps.h   |  1 +
>  drivers/gpu/drm/i915/i915_pmu.c   |  3 +--
>  5 files changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> index 108b9e76c32e..96c03a1258e1 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> @@ -380,7 +380,7 @@ void intel_gt_pm_frequency_dump(struct intel_gt *gt, 
> struct drm_printer *p)
>   rpinclimit = intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD);
>   rpdeclimit = intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD);
>
> - rpstat = intel_uncore_read(uncore, GEN6_RPSTAT1);
> + rpstat = intel_rps_read_rpstat(rps);
>   rpcurupei = intel_uncore_read(uncore, GEN6_RP_CUR_UP_EI) & 
> GEN6_CURICONT_MASK;
>   rpcurup = intel_uncore_read(uncore, GEN6_RP_CUR_UP) & 
> GEN6_CURBSYTAVG_MASK;
>   rpprevup = intel_uncore_read(uncore, GEN6_RP_PREV_UP) & 
> GEN6_CURBSYTAVG_MASK;
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index fb2c56777480..dac59c3e68db 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1510,6 +1510,11 @@
>  #define VLV_RENDER_C0_COUNT  _MMIO(0x138118)
>  #define VLV_MEDIA_C0_COUNT   _MMIO(0x13811c)
>
> +#define GEN12_RPSTAT1_MMIO(0x1381b4)
> +#define   GEN12_CAGF_SHIFT   11
> +#define   GEN12_CAGF_MASKREG_GENMASK(19, 11)
> +#define   GEN12_VOLTAGE_MASK REG_GENMASK(10, 0)

Let's remove GEN12_VOLTAGE_MASK, looks like it's not being used.

> +
>  #define GEN11_GT_INTR_DW(x)  _MMIO(0x190018 + ((x) * 4))
>  #define   GEN11_CSME (31)
>  #define   GEN11_GUNIT(28)
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c 
> b/drivers/gpu/drm/i915/gt/intel_rps.c
> index 6fadde4ee7bf..341f96f536e8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.c
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.c
> @@ -2040,6 +2040,19 @@ void intel_rps_sanitize(struct intel_rps *rps)
>   rps_disable_interrupts(rps);
>  }
>
> +u32 intel_rps_read_rpstat(struct intel_rps *rps)
> +{
> + struct drm_i915_private *i915 = rps_to_i915(rps);
> + u32 rpstat;
> +
> + if (GRAPHICS_VER(i915) >= 12)
> + rpstat = intel_uncore_read(rps_to_gt(rps)->uncore, 
> GEN12_RPSTAT1);
> + else
> + rpstat = intel_uncore_read(rps_to_gt(rps)->uncore, 
> GEN6_RPSTAT1);

Probably nit but how about something like this:

i915_reg_t rpstat;

if (GRAPHICS_VER(i915) >= 12)
rpstat = GEN12_RPSTAT1;
else
rpstat = GEN6_RPSTAT1;

return intel_uncore_read(rps_to_gt(rps)->uncore, rpstat);
> +
> + return rpstat;
> +}
> +
>  u32 intel_rps_get_cagf(struct intel_rps *rps, u32 rpstat)
>  {
>   struct drm_i915_private *i915 = rps_to_i915(rps);
> @@ -2047,6 +2060,8 @@ u32 intel_rps_get_cagf(struct intel_rps *rps, u32 
> rpstat)
>
>   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>   cagf = (rpstat >> 8) & 0xff;
> + else if (GRAPHICS_VER(i915) >= 12)
> + cagf = (rpstat & GEN12_CAGF_MASK) >> GEN12_CAGF_SHIFT;
>   else if (GRAPHICS_VER(i915) >= 9)
>   cagf = (rpstat & GEN9_CAGF_MASK) >> GEN9_CAGF_SHIFT;
>   else if (IS_HASWELL(i915) || IS_BROADWELL(i915))
> @@ -2071,7 +2086,7 @@ static u32 read_cagf(struct intel_rps *rps)
>   freq = vlv_punit_read(i915, PUNIT_REG_GPU_FREQ_STS);
>   vlv_punit_put(i915);
>   } else if (GRAPHICS_VER(i915) >= 6) {
> - freq = intel_uncore_read(uncore, GEN6_RPSTAT1);
> + freq = intel_rps_read_rpstat(rps);
>   } else {
>   freq = intel_uncore_read(uncore, MEMSTAT_ILK);
>   }
> diff --git a/drivers/gpu/drm/i915/gt/intel_rps.h 
> b/drivers/gpu/drm/i915/gt/intel_rps.h
> index 4509dfdc52e0..08bae6d97870 100644
> --- a/drivers/gpu/drm/i915/gt/intel_rps.h
> +++ b/drivers/gpu/drm/i915/gt/intel_rps.h
> @@ -47,6 +47,7 @@ u32 intel_rps_get_rp1_frequency(struct intel_rps *rps);
>  u32 intel_rps_get_rpn_frequency(struct intel_rps *rps);
>  u32 intel_rps_read_punit_req(struct intel_rps *rps);
>  u32 intel_rps_read_punit_req_frequency(struct intel_

Re: [Intel-gfx] [PATCH 6/6] drm/i915/mtl: Add C6 residency support for MTL SAMedia

2022-09-09 Thread Dixit, Ashutosh
On Thu, 08 Sep 2022 19:56:46 -0700, Badal Nilawar wrote:
>
> For MTL SAMedia updated relevant functions and places in the code to get
> Media C6 residency.
>
> Cc: Vinay Belgaumkar 
> Cc: Ashutosh Dixit 
> Cc: Chris Wilson 
> Signed-off-by: Badal Nilawar 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 56 +++
>  drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 10 
>  drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   |  9 ++-
>  drivers/gpu/drm/i915/gt/intel_rc6.c   |  5 +-
>  drivers/gpu/drm/i915/gt/selftest_rc6.c|  9 ++-
>  drivers/gpu/drm/i915/i915_pmu.c   |  8 ++-
>  6 files changed, 93 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> index 96c03a1258e1..6913c0a2ba33 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> @@ -269,6 +269,60 @@ static int ilk_drpc(struct seq_file *m)
>   return 0;
>  }
>
> +static int mtl_drpc(struct seq_file *m)
> +{
> + struct intel_gt *gt = m->private;
> + struct intel_uncore *uncore = gt->uncore;
> + u32 gt_core_status, rcctl1;
> + u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
> + i915_reg_t reg;
> +

Comparing with gen6_drpc do we need to add this here:

mt_fwake_req = intel_uncore_read_fw(uncore, FORCEWAKE_MT);

And show it later as in gen6_drpc?

> + gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
> +
> + rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
> + mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
> + mtl_powergate_status = intel_uncore_read(uncore,
> +  GEN9_PWRGT_DOMAIN_STATUS);
> +
> + seq_printf(m, "RC6 Enabled: %s\n",
> +str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
> + if (gt->type == GT_MEDIA) {
> + seq_printf(m, "Media Well Gating Enabled: %s\n",
> +str_yes_no(mtl_powergate_enable & 
> GEN9_MEDIA_PG_ENABLE));
> + } else {
> + seq_printf(m, "Render Well Gating Enabled: %s\n",
> +str_yes_no(mtl_powergate_enable & 
> GEN9_RENDER_PG_ENABLE));
> + }
> +
> + seq_puts(m, "Current RC state: ");
> +
> + switch ((gt_core_status & MTL_CC_MASK) >> MTL_CC_SHIFT) {
> + case MTL_CC0:
> + seq_puts(m, "on\n");
> + break;
> + case MTL_CC6:
> + seq_puts(m, "RC6\n");
> + break;
> + default:
> + seq_puts(m, "Unknown\n");
> + break;
> + }
> +
> + if (gt->type == GT_MEDIA)
> + seq_printf(m, "Media Power Well: %s\n",
> +(mtl_powergate_status &
> + GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
> + else
> + seq_printf(m, "Render Power Well: %s\n",
> +(mtl_powergate_status &
> + GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
> +
> + reg = (gt->type == GT_MEDIA) ? MTL_MEDIA_MC6 : GEN6_GT_GFX_RC6;
> + print_rc6_res(m, "RC6 residency since boot:", reg);
> +
> + return fw_domains_show(m, NULL);
> +}
> +
>  static int drpc_show(struct seq_file *m, void *unused)
>  {
>   struct intel_gt *gt = m->private;
> @@ -279,6 +333,8 @@ static int drpc_show(struct seq_file *m, void *unused)
>   with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>   if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
>   err = vlv_drpc(m);
> + else if (MEDIA_VER(i915) >= 13)

Why a MEDIA_VER check here? Should we use 'if (GRAPHICS_VER_FULL(i915) >=
IP_VER(12, 70))' or 'IS_METEORLAKE'?

> + err = mtl_drpc(m);
>   else if (GRAPHICS_VER(i915) >= 6)
>   err = gen6_drpc(m);
>   else
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> index ab9a5e66ab34..2c6cf29888e0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> @@ -1522,6 +1522,16 @@
>   */
>  #define MTL_MIRROR_TARGET_WP1  _MMIO(0x0C60)
>  #define   MTL_CAGF_MASKREG_GENMASK(8, 0)
> +#define   MTL_CC0  0x0
> +#define   MTL_CC6  0x3
> +#define   MTL_CC_SHIFT 9
> +#define   MTL_CC_MASK  (0xf << MTL_CC_SHIFT)

Let's remove MTL_CC_SHIFT and do

#define   MTL_CC_MASK   REG_GENMASK(12, 9)

Above where we are using these just use REG_FIELD_GET(MTL_CC_MASK,
gt_core_status).

> +
> +/*
> + * MTL: This register contains the total MC6 residency time that SAMedia was
> + * since boot
> + */
> +#define MTL_MEDIA_MC6  _MMIO(0x138048)
>
>  #define GEN11_GT_INTR_DW(x)  _MMIO(0x190018 + ((x) * 4))
>  #define   GEN11_CSME  

Re: [Intel-gfx] [PATCH 2/7] drm/i915/hwmon: Add HWMON current voltage support

2022-09-12 Thread Dixit, Ashutosh
On Mon, 12 Sep 2022 07:09:28 -0700, Gupta, Anshuman wrote:
>
> > +static int
> > +hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val) {
> > +   struct i915_hwmon *hwmon = ddat->hwmon;
> > +   intel_wakeref_t wakeref;
> > +   u32 reg_value;
> > +
> > +   switch (attr) {
> > +   case hwmon_in_input:
>
> Other attributes in this series take hwmon->lock before accessing i915
> registers , So do we need lock here as well ?

The lock is being taken only for RMW and for making sure energy counter
updates happen atomically. We are not taking the lock for just reads so IMO
no lock is needed here.

> > +   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> > +   reg_value = intel_uncore_read(ddat->uncore, hwmon-
> > >rg.gt_perf_status);
> > +   /* In units of 2.5 millivolt */
> > +   *val =
> > DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) *
> > 25, 10);
> > +   return 0;
> > +   default:
> > +   return -EOPNOTSUPP;
> > +   }
> > +}

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 4/6] drm/i915: Use GEN12 RPSTAT register

2022-09-12 Thread Dixit, Ashutosh
On Mon, 12 Sep 2022 04:29:38 -0700, Nilawar, Badal wrote:
>
> >> diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
> >> b/drivers/gpu/drm/i915/i915_pmu.c
> >> index 958b37123bf1..a24704ec2c18 100644
> >> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >> @@ -371,7 +371,6 @@ static void
> >>   frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> >>   {
> >>struct drm_i915_private *i915 = gt->i915;
> >> -  struct intel_uncore *uncore = gt->uncore;
> >>struct i915_pmu *pmu = &i915->pmu;
> >>struct intel_rps *rps = >->rps;
> >>
> >> @@ -394,7 +393,7 @@ frequency_sample(struct intel_gt *gt, unsigned int 
> >> period_ns)
> >> * case we assume the system is running at the intended
> >> * frequency. Fortunately, the read should rarely fail!
> >> */
> >> -  val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
> >> +  val = intel_rps_read_rpstat(rps);
> >
> > Hmm, we got rid of _fw which the comment above refers to. Maybe we need a
> > fw flag to intel_rps_read_rpstat?
>
> Above function before reading rpstat it checks if gt is awake.

Ok, so you are referring to intel_gt_pm_get_if_awake check in frequency_sample.

> So when gt is awake shouldn't matter if we read GEN6_RPSTAT1 with
> forcewake.In that case we can remove above comment.  Let me know your
> thoughts on this.

I am not entirely sure about this. For example in c1c82d267ae8
intel_uncore_read_fw was introduced with the same intel_gt_pm_get_if_awake
check. So this would mean even if gt is awake not taking forcewake makes a
difference. The same code pattern was retained in b66ecd0438bf. Maybe it's
because there are no locks?

Under the circumstances I think we could do one of two things:
1. If we want to drop _fw, we should do it as a separate patch with its own
   justification so it can be reviewed separately.
2. Otherwise as I mentioned we should retain the _fw and add a fw flag to
   intel_rps_read_rpstat.

Thanks.
--
Ashutosh

> >>if (val)
> >>val = intel_rps_get_cagf(rps, val);
> >>else
> >> --
> >> 2.25.1
> >>


Re: [Intel-gfx] [PATCH 01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

2022-09-12 Thread Dixit, Ashutosh
On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
>
> On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
> >
>
> Hi Umesh,
>
> > With GuC mode of submission, GuC is in control of defining the context id 
> > field
> > that is part of the OA reports. To filter reports, UMD and KMD must know 
> > what sw
> > context id was chosen by GuC. There is not interface between KMD and GuC to
> > determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash 
> > OA
> > reports for the specific context.
>
> Do you think it is worth defining an interface for GuC to return the sw
> ctx_id it will be using for a ctx, say at ctx registration time?

Umesh, I came across these in GuC documentation:

guc_pcv1_context_parameters_set_h2g_data_t::context_id
guc_pcv2_context_parameters_set_h2g_data_t::context_id

Also in the code we have in prepare_context_registration_info_v70 'ctx_id =
ce->guc_id.id' which seems to be assigned in new_guc_id. So wondering if
this is what we need and we already have it?

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 2/7] drm/i915/hwmon: Add HWMON current voltage support

2022-09-13 Thread Dixit, Ashutosh
On Tue, 13 Sep 2022 01:11:57 -0700, Gupta, Anshuman wrote:
>

Hi Anshuman,

> > -Original Message-
> > From: Dixit, Ashutosh 
> > Sent: Monday, September 12, 2022 10:08 PM
> > To: Gupta, Anshuman 
> > Cc: Nilawar, Badal ; 
> > intel-gfx@lists.freedesktop.org;
> > Tauro, Riana ; Ewins, Jon ;
> > linux-hw...@vger.kernel.org
> > Subject: Re: [PATCH 2/7] drm/i915/hwmon: Add HWMON current voltage
> > support
> >
> > On Mon, 12 Sep 2022 07:09:28 -0700, Gupta, Anshuman wrote:
> > >
> > > > +static int
> > > > +hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val) {
> > > > +   struct i915_hwmon *hwmon = ddat->hwmon;
> > > > +   intel_wakeref_t wakeref;
> > > > +   u32 reg_value;
> > > > +
> > > > +   switch (attr) {
> > > > +   case hwmon_in_input:
> > >
> > > Other attributes in this series take hwmon->lock before accessing i915
> > > registers , So do we need lock here as well ?
> >
> > The lock is being taken only for RMW and for making sure energy counter
> > updates happen atomically. We are not taking the lock for just reads so IMO 
> > no
> > lock is needed here.
>
> If that is the case, then why it needs to grab a lock for rmw on
> different Register ? Like in patch 3 of this series grabs
> hwmon->howmon_lock for rmw on two different register pkg_power_sku_unit
> and pkg_rapl_limit.

I don't see this happening, where do you see it?

> One register rmw should be independent of other register here ?

Yes each register RMW is independent. In Patch 3 only hwm_power_write (not
hwm_power_read) is taking the lock for RMW on pkg_rapl_limit (lock is not
taken for pkg_power_sku_unit). So the assumption is that RMW of a single
register are not atomic and must be serialized with a lock. Reads are
considered to not need a lock.

Thanks.
--
Ashutosh


> >
> > > > +   with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> > > > +   reg_value = intel_uncore_read(ddat->uncore, 
> > > > hwmon-
> > > > >rg.gt_perf_status);
> > > > +   /* In units of 2.5 millivolt */
> > > > +   *val =
> > > > DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value)
> > * 25,
> > > > 10);
> > > > +   return 0;
> > > > +   default:
> > > > +   return -EOPNOTSUPP;
> > > > +   }
> > > > +}
> >
> > Thanks.
> > --
> > Ashutosh


Re: [Intel-gfx] [PATCH 02/19] drm/i915/perf: Add OA formats for DG2

2022-09-13 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:38 -0700, Umesh Nerlige Ramappa wrote:
>
> Add new OA formats for DG2.

Should we change the patch title and commit message a bit to 'Add OAR and
OAG formats for DG2'?

> Some of the newer OA formats are not
> multples of 64 bytes and are not powers of 2. For those formats, adjust
> hw_tail accordingly when checking for new reports.
>
> Signed-off-by: Umesh Nerlige Ramappa 
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 63 
>  include/uapi/drm/i915_drm.h  |  6 +++
>  2 files changed, 46 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 735244a3aedd..c8331b549d31 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -306,7 +306,8 @@ static u32 i915_oa_max_sample_rate = 10;
>
>  /* XXX: beware if future OA HW adds new report formats that the current
>   * code assumes all reports have a power-of-two size and ~(size - 1) can
> - * be used as a mask to align the OA tail pointer.
> + * be used as a mask to align the OA tail pointer. In some of the
> + * formats, R is used to denote reserved field.
>   */
>  static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>   [I915_OA_FORMAT_A13]= { 0, 64 },
> @@ -320,6 +321,10 @@ static const struct i915_oa_format 
> oa_formats[I915_OA_FORMAT_MAX] = {
>   [I915_OA_FORMAT_A12]= { 0, 64 },
>   [I915_OA_FORMAT_A12_B8_C8]  = { 2, 128 },
>   [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
> + [I915_OAR_FORMAT_A32u40_A4u32_B8_C8]= { 5, 256 },
> + [I915_OA_FORMAT_A24u40_A14u32_B8_C8]= { 5, 256 },
> + [I915_OAR_FORMAT_A36u64_B8_C8]  = { 1, 384 },
> + [I915_OA_FORMAT_A38u64_R2u64_B8_C8] = { 1, 448 },

Isn't the size for this last one 416 (or 400)? Bspec: 52198. Unless the
size has to be a multiple of 64?

Looks like Lionel's R-b is not showing up on Patchwork, might need to be
manually added. For now this is:

Acked-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 05/19] drm/i915/perf: Enable commands per clock reporting in OA

2022-09-13 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:41 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> XEHPSDV and DG2 provide a way to configure bytes per clock vs commands
> per clock reporting. Enable command per clock setting on enabling OA.

What is the reason for selecting commands per clock vs bytes per clock?
Also probably mention Bspec: 51762 in the commit message too.

> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index efa7eda83edd..6fc4f0d8fc5a 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2745,10 +2745,12 @@ static int
>  gen12_enable_metric_set(struct i915_perf_stream *stream,
>   struct i915_active *active)
>  {
> + struct drm_i915_private *i915 = stream->perf->i915;
>   struct intel_uncore *uncore = stream->uncore;
>   struct i915_oa_config *oa_config = stream->oa_config;
>   bool periodic = stream->periodic;
>   u32 period_exponent = stream->period_exponent;
> + u32 sqcnt1;
>   int ret;
>
>   intel_uncore_write(uncore, GEN12_OAG_OA_DEBUG,
> @@ -2767,6 +2769,16 @@ gen12_enable_metric_set(struct i915_perf_stream 
> *stream,
>   (period_exponent << 
> GEN12_OAG_OAGLBCTXCTRL_TIMER_PERIOD_SHIFT))
>   : 0);
>
> + /*
> +  * Initialize Super Queue Internal Cnt Register
> +  * Set PMON Enable in order to collect valid metrics.
> +  * Enable commands per clock reporting in OA for XEHPSDV onward.
> +  */
> + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +  (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);

Also from Bspec 0:Unitsof4cmd and 1:Unitsof128B so looks like bit 29 should
be set to 0 for commands per clock setting? Or I am wrong?

> +
> + intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1);
> +
>   /*
>* Update all contexts prior writing the mux configurations as we need
>* to make sure all slices/subslices are ON before writing to NOA
> @@ -2816,6 +2828,8 @@ static void gen11_disable_metric_set(struct 
> i915_perf_stream *stream)
>  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>  {
>   struct intel_uncore *uncore = stream->uncore;
> + struct drm_i915_private *i915 = stream->perf->i915;
> + u32 sqcnt1;
>
>   /* Reset all contexts' slices/subslices configurations. */
>   gen12_configure_all_contexts(stream, NULL, NULL);
> @@ -2826,6 +2840,12 @@ static void gen12_disable_metric_set(struct 
> i915_perf_stream *stream)
>
>   /* Make sure we disable noa to save power. */
>   intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> +
> + sqcnt1 = GEN12_SQCNT1_PMON_ENABLE |
> +  (HAS_OA_BPC_REPORTING(i915) ? GEN12_SQCNT1_OABPC : 0);
> +
> + /* Reset PMON Enable to save power. */
> + intel_uncore_rmw(uncore, GEN12_SQCNT1, sqcnt1, 0);
>  }
>
>  static void gen7_oa_enable(struct i915_perf_stream *stream)
> diff --git a/drivers/gpu/drm/i915/i915_perf_oa_regs.h 
> b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> index 0ef3562ff4aa..381d94101610 100644
> --- a/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> +++ b/drivers/gpu/drm/i915/i915_perf_oa_regs.h
> @@ -134,4 +134,8 @@
>  #define GDT_CHICKEN_BITS_MMIO(0x9840)
>  #define   GT_NOA_ENABLE  0x0080
>
> +#define GEN12_SQCNT1 _MMIO(0x8718)
> +#define   GEN12_SQCNT1_PMON_ENABLE   REG_BIT(30)
> +#define   GEN12_SQCNT1_OABPC REG_BIT(29)
> +
>  #endif /* __INTEL_PERF_OA_REGS__ */


Re: [Intel-gfx] [PATCH 06/19] drm/i915/perf: Use helpers to process reports w.r.t. OA buffer size

2022-09-14 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:42 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 6fc4f0d8fc5a..bbf1c574f393 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -385,6 +385,21 @@ static struct ctl_table_header *sysctl_header;
>
>  static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
>
> +static u32 _oa_taken(struct i915_perf_stream * stream, u32 tail, u32 head)

nit: no space between * and stream.

> +{
> + u32 size = stream->oa_buffer.vma->size;
> +
> + return tail >= head ? tail - head : size - (head - tail);
> +}

If we are doing this we should probably eliminate references to OA_TAKEN
which serves an identical purpose (I think there is one remaining
reference) and also delete OA_TAKEN #define.

> +
> +static u32 _rewind_tail(struct i915_perf_stream * stream, u32 
> relative_hw_tail,
> + u32 rewind_delta)
> +{
> + return rewind_delta > relative_hw_tail ?
> +stream->oa_buffer.vma->size - (rewind_delta - relative_hw_tail) :
> +relative_hw_tail - rewind_delta;
> +}

Also are we really saying here that we are supporting non-power-of-2 OA
buffer sizes? Because if we stayed with power-of-2 sizes the expression
above are nice and elegant and actually closer to the previous code being
changed in this patch. For example:

#include 

static u32 _oa_taken(struct i915_perf_stream *stream, u32 tail, u32 head)
{
return CIRC_CNT(tail, head, stream->oa_buffer.vma->size);
}

static u32 _rewind_tail(struct i915_perf_stream *stream, u32 relative_hw_tail,
u32 rewind_delta)
{
return CIRC_CNT(relative_hw_tail, rewind_delta, 
stream->oa_buffer.vma->size);
}

Note that for power-of-2 sizes the two functions above are identical but we
should keep them separate for clarity (as is done in the patch) since they
are serving two different functions in the OA code.

Also another assumption in the code seems to be:

stream->oa_buffer.vma->size == OA_BUFFER_SIZE

which I am pretty sure will not hold for arbitrary non-power-of-2 OA buffer
sizes? So we might as well stick with power-of-2 sizes and change later in
a separate patch only if needed?

Thanks.
--
Ashutosh

> +
>  void i915_oa_config_release(struct kref *ref)
>  {
>   struct i915_oa_config *oa_config =
> @@ -487,12 +502,14 @@ static bool oa_buffer_check_unlocked(struct 
> i915_perf_stream *stream)
>* sizes need not be integral multiples or 64 or powers of 2.
>* Compute potentially partially landed report in the OA buffer
>*/
> - partial_report_size = OA_TAKEN(hw_tail, stream->oa_buffer.tail);
> + partial_report_size =
> + _oa_taken(stream, hw_tail, stream->oa_buffer.tail);
>   partial_report_size %= report_size;
>
>   /* Subtract partial amount off the tail */
> - hw_tail = gtt_offset + ((hw_tail - partial_report_size) &
> - (stream->oa_buffer.vma->size - 1));
> + hw_tail = gtt_offset + _rewind_tail(stream,
> + hw_tail - gtt_offset,
> + partial_report_size);
>
>   now = ktime_get_mono_fast_ns();
>
> @@ -527,16 +544,16 @@ static bool oa_buffer_check_unlocked(struct 
> i915_perf_stream *stream)
>* memory in the order they were written to.
>* If not : (╯°□°)╯︵ ┻━┻
>*/
> - while (OA_TAKEN(tail, aged_tail) >= report_size) {
> + while (_oa_taken(stream, tail, aged_tail) >= report_size) {
>   u32 *report32 = (void *)(stream->oa_buffer.vaddr + 
> tail);
>
>   if (report32[0] != 0 || report32[1] != 0)
>   break;
>
> - tail = (tail - report_size) & (OA_BUFFER_SIZE - 1);
> + tail = _rewind_tail(stream, tail, report_size);
>   }
>
> - if (OA_TAKEN(hw_tail, tail) > report_size &&
> + if (_oa_taken(stream, hw_tail, tail) > report_size &&
>   __ratelimit(&stream->perf->tail_pointer_race))
>   DRM_NOTE("unlanded report(s) head=0x%x "
>"tail=0x%x hw_tail=0x%x\n",
> @@ -547,8 +564,9 @@ static bool oa_buffer_check_unlocked(struct 
> i915_perf_stream *stream)
>   stream->oa_buffer.aging_timestamp = now;
>   }
>
> - pollin = OA_TAKEN(stream->oa_buffer.tail - gtt_offset,
> -   stream->oa_buffer.head - gtt_offset) >= report_size;
> + pollin = _oa_taken(stream,
> +stream->oa_buffer.tail,
> +stream->oa_buffer.head) >= report_size;
>
>   spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
>
> @@ -679,11 +697,9 @@ static int gen8_append_oa_reports(struct 
> i915_perf_stream *stream,
>   int 

Re: [Intel-gfx] [PATCH 4/6] drm/i915: Use GEN12 RPSTAT register

2022-09-14 Thread Dixit, Ashutosh
On Wed, 14 Sep 2022 02:56:26 -0700, Nilawar, Badal wrote:
>
> On 13-09-2022 13:17, Tvrtko Ursulin wrote:
> >
> > On 13/09/2022 01:09, Dixit, Ashutosh wrote:
> >> On Mon, 12 Sep 2022 04:29:38 -0700, Nilawar, Badal wrote:
> >>>
> >>>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c
> >>>>> b/drivers/gpu/drm/i915/i915_pmu.c
> >>>>> index 958b37123bf1..a24704ec2c18 100644
> >>>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>>>> @@ -371,7 +371,6 @@ static void
> >>>>>    frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> >>>>>    {
> >>>>> struct drm_i915_private *i915 = gt->i915;
> >>>>> -    struct intel_uncore *uncore = gt->uncore;
> >>>>> struct i915_pmu *pmu = &i915->pmu;
> >>>>> struct intel_rps *rps = >->rps;
> >>>>>
> >>>>> @@ -394,7 +393,7 @@ frequency_sample(struct intel_gt *gt, unsigned
> >>>>> int period_ns)
> >>>>>  * case we assume the system is running at the intended
> >>>>>  * frequency. Fortunately, the read should rarely fail!
> >>>>>  */
> >>>>> -    val = intel_uncore_read_fw(uncore, GEN6_RPSTAT1);
> >>>>> +    val = intel_rps_read_rpstat(rps);
> >>>>
> >>>> Hmm, we got rid of _fw which the comment above refers to. Maybe we
> >>>> need a
> >>>> fw flag to intel_rps_read_rpstat?
> >>>
> >>> Above function before reading rpstat it checks if gt is awake.
> >>
> >> Ok, so you are referring to intel_gt_pm_get_if_awake check in
> >> frequency_sample.
> >>
> >>> So when gt is awake shouldn't matter if we read GEN6_RPSTAT1 with
> >>> forcewake.In that case we can remove above comment.  Let me know your
> >>> thoughts on this.
> >>
> >> I am not entirely sure about this. For example in c1c82d267ae8
> >> intel_uncore_read_fw was introduced with the same
> >> intel_gt_pm_get_if_awake
> >> check. So this would mean even if gt is awake not taking forcewake makes
> >> a
> >> difference. The same code pattern was retained in b66ecd0438bf. Maybe
> >> it's
> >> because there are no locks?
> >
> > Its about power. As c1c82d267ae8 ("drm/i915/pmu: Cheat when reading the
> > actual frequency to avoid fw") explains the _fw variant is to avoid
> > preventing RC6, and so increased GPU power draw, just because someone has
> > PMU open. (Because of the 200Hz sampling timer that is needed for PMU
> > frequency reporting.)
> >
> >> Under the circumstances I think we could do one of two things:
> >> 1. If we want to drop _fw, we should do it as a separate patch with its
> >> own
> >>     justification so it can be reviewed separately.
> >> 2. Otherwise as I mentioned we should retain the _fw and add a fw flag to
> >>     intel_rps_read_rpstat.
> >
> > Agreed. Or instead of the flag, the usual pattern of having
> > intel_rps_read_rpstat_fw and make intel_rps_read_rpsstat get the
> > forcewake.
> >
> > Also, may I ask, this patch is in the MTL enablement series but the
> > commit message and patch content seem like it is fixing a wider Gen12
> > issue? What is the extent of incorrect behaviour without it? Should it be
> > tagged for stable for first Tigerlake supporting kernel?
>
> GEN6_RPSTAT1(0xa01c) and GEN12_RPSTAT1(0x1381b4) both are supported by
> gen12 and above. The difference between two is GEN6_RPSTAT1 falls under
> RENDER forcewake domain and GEN12_RPSTAT1 does not require forcewake to
> access. GEN12_RPSTAT1 is punit register and when GT is in RC6 it will give
> frequency as 0.

Correct, so no changes needed for stable kernels. But going forward Badal
is proposing (which I sort of agree with but may need some discussion) that
we change i915 behavior to return 0 freq (instead of cur_freq or RPn) when
GT is idle or in RC6 (so we don't take forcewake to read freq when GT is in
RC6).

> Reason for clubbing this patch with MTL series is due to common function
> intel_rps_read_rpstat. I think I should send this patch in separate series.

Agree!

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 08/19] drm/i915/perf: Move gt-specific data from i915->perf to gt->perf

2022-09-14 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:44 -0700, Umesh Nerlige Ramappa wrote:
>
> Make perf part of gt as the OAG buffer is specific to a gt. The refactor
> eventually simplifies programming the right OA buffer and the right HW
> registers when supporting multiple gts.

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 09/19] drm/i915/perf: Replace gt->perf.lock with stream->lock for file ops

2022-09-14 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:45 -0700, Umesh Nerlige Ramappa wrote:
>
> With multi-gt, user can access multiple OA buffers concurrently. Use
> stream->lock instead of gt->perf.lock to serialize file operations.

Ok, will come in handy for multiple streams per gt:

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 06/19] drm/i915/perf: Use helpers to process reports w.r.t. OA buffer size

2022-09-14 Thread Dixit, Ashutosh
On Wed, 14 Sep 2022 11:19:30 -0700, Umesh Nerlige Ramappa wrote:
>
> On Wed, Sep 14, 2022 at 09:04:10AM -0700, Dixit, Ashutosh wrote:
> > On Tue, 23 Aug 2022 13:41:42 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >
> > Hi Umesh,
> >
> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> >> b/drivers/gpu/drm/i915/i915_perf.c
> >> index 6fc4f0d8fc5a..bbf1c574f393 100644
> >> --- a/drivers/gpu/drm/i915/i915_perf.c
> >> +++ b/drivers/gpu/drm/i915/i915_perf.c
> >> @@ -385,6 +385,21 @@ static struct ctl_table_header *sysctl_header;
> >>
> >>  static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer 
> >> *hrtimer);
> >>
> >> +static u32 _oa_taken(struct i915_perf_stream * stream, u32 tail, u32 head)
> >
> > nit: no space between * and stream.
> >
> >> +{
> >> +  u32 size = stream->oa_buffer.vma->size;
> >> +
> >> +  return tail >= head ? tail - head : size - (head - tail);
> >> +}
> >
> > If we are doing this we should probably eliminate references to OA_TAKEN
> > which serves an identical purpose (I think there is one remaining
> > reference) and also delete OA_TAKEN #define.
> >
> >> +
> >> +static u32 _rewind_tail(struct i915_perf_stream * stream, u32 
> >> relative_hw_tail,
> >> +  u32 rewind_delta)
> >> +{
> >> +  return rewind_delta > relative_hw_tail ?
> >> + stream->oa_buffer.vma->size - (rewind_delta - relative_hw_tail) :
> >> + relative_hw_tail - rewind_delta;
> >> +}
> >
> > Also are we really saying here that we are supporting non-power-of-2 OA
> > buffer sizes? Because if we stayed with power-of-2 sizes the expression
> > above are nice and elegant and actually closer to the previous code being
> > changed in this patch. For example:
> >
> > #include 
> >
> > static u32 _oa_taken(struct i915_perf_stream *stream, u32 tail, u32 head)
> > {
> > return CIRC_CNT(tail, head, stream->oa_buffer.vma->size);
> > }
> >
> > static u32 _rewind_tail(struct i915_perf_stream *stream, u32 
> > relative_hw_tail,
> > u32 rewind_delta)
> > {
> > return CIRC_CNT(relative_hw_tail, rewind_delta, 
> > stream->oa_buffer.vma->size);
> > }
> >
> > Note that for power-of-2 sizes the two functions above are identical but we
> > should keep them separate for clarity (as is done in the patch) since they
> > are serving two different functions in the OA code.
> >
> > Also another assumption in the code seems to be:
> >
> > stream->oa_buffer.vma->size == OA_BUFFER_SIZE
> >
> > which I am pretty sure will not hold for arbitrary non-power-of-2 OA buffer
> > sizes? So we might as well stick with power-of-2 sizes and change later in
> > a separate patch only if needed?
>
> Most changes here are related to the OA buffer size issue and that is
> specific to xehpsdv where the size is not a power of 2. I am thinking of
> dropping these changes in the next revision since DG2 is fixed and OA
> buffer sizes are power of 2.

In the code stream->oa_buffer.vma->size and OA_BUFFER_SIZE are both used,
if we want to clean that up and only use stream->oa_buffer.vma->size, we
could still do soemthing like I suggested with just power-of-2 sizes and
keep this patch. If we ever have to support non-power-of-2 sizes in the
future we'll just need to change _oa_taken and _rewind_tail
functions. Anyway your call.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 11/19] drm/i915/perf: Store a pointer to oa_format in oa_buffer

2022-09-14 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:47 -0700, Umesh Nerlige Ramappa wrote:
>
> @@ -3184,15 +3184,12 @@ static int i915_oa_stream_init(struct 
> i915_perf_stream *stream,
>   stream->sample_flags = props->sample_flags;
>   stream->sample_size += format_size;
>
> - stream->oa_buffer.format_size = format_size;
> - if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format_size == 0))
> + stream->oa_buffer.format = &perf->oa_formats[props->oa_format];
> + if (drm_WARN_ON(&i915->drm, stream->oa_buffer.format->size == 0))
>   return -EINVAL;

I would also move these 3 lines before the two lines on the top, eliminate
the format_size variable and assignment and just use
stream->oa_buffer.format->size. Otherwise:

Reviewed-by: Ashutosh Dixit 

>   stream->hold_preemption = props->hold_preemption;
>
> - stream->oa_buffer.format =
> - perf->oa_formats[props->oa_format].format;
> -
>   stream->periodic = props->oa_periodic;
>   if (stream->periodic)
>   stream->period_exponent = props->oa_period_exponent;
> diff --git a/drivers/gpu/drm/i915/i915_perf_types.h 
> b/drivers/gpu/drm/i915/i915_perf_types.h
> index dc9bfd8086cf..e0c96b44eda8 100644
> --- a/drivers/gpu/drm/i915/i915_perf_types.h
> +++ b/drivers/gpu/drm/i915/i915_perf_types.h
> @@ -250,11 +250,10 @@ struct i915_perf_stream {
>* @oa_buffer: State of the OA buffer.
>*/
>   struct {
> + const struct i915_oa_format *format;
>   struct i915_vma *vma;
>   u8 *vaddr;
>   u32 last_ctx_id;
> - int format;
> - int format_size;
>   int size_exponent;
>


Re: [Intel-gfx] [PATCH 02/19] drm/i915/perf: Add OA formats for DG2

2022-09-14 Thread Dixit, Ashutosh
On Wed, 14 Sep 2022 13:54:34 -0700, Umesh Nerlige Ramappa wrote:
>
> On Tue, Sep 13, 2022 at 08:40:22AM -0700, Dixit, Ashutosh wrote:
> > On Tue, 23 Aug 2022 13:41:38 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >> Add new OA formats for DG2.
> >
> > Should we change the patch title and commit message a bit to 'Add OAR and
> > OAG formats for DG2'?
>
> Hmm, I assumed OAR was also part of TGL, but looks like it's not. I can
> change the title as suggested.

By 'Add OAR and OAG formats for DG2' I meant we are only adding OAR and OAG
formats and not including other DG2 formats ;)


Re: [Intel-gfx] [PATCH 12/19] drm/i915/perf: Parse 64bit report header formats correctly

2022-09-15 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:48 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> @@ -740,23 +802,19 @@ static int gen8_append_oa_reports(struct 
> i915_perf_stream *stream,
>   u8 *report = oa_buf_base + head;
>   u32 *report32 = (void *)report;
>   u32 ctx_id;
> - u32 reason;
> + u64 reason;
>
>   /*
>* The reason field includes flags identifying what
>* triggered this specific report (mostly timer
>* triggered or e.g. due to a context switch).
>*
> -  * This field is never expected to be zero so we can
> -  * check that the report isn't invalid before copying
> -  * it to userspace...
> +  * In MMIO triggered reports, some platforms do not set the
> +  * reason bit in this field and it is valid to have a reason
> +  * field of zero.
>*/
> - reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
> -   (GRAPHICS_VER(stream->perf->i915) == 12 ?
> -OAREPORT_REASON_MASK_EXTENDED :
> -OAREPORT_REASON_MASK));
> -
> - ctx_id = report32[2] & stream->specific_ctx_id_mask;
> + reason = oa_report_reason(stream, report);
> + ctx_id = oa_context_id(stream, report32);
>
>   /*
>* Squash whatever is in the CTX_ID field if it's marked as
> @@ -766,9 +824,10 @@ static int gen8_append_oa_reports(struct 
> i915_perf_stream *stream,
>* Note: that we don't clear the valid_ctx_bit so userspace can
>* understand that the ID has been squashed by the kernel.
>*/
> - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
> - GRAPHICS_VER(stream->perf->i915) <= 11)
> - ctx_id = report32[2] = INVALID_CTX_ID;
> + if (oa_report_ctx_invalid(stream, report)) {
> + ctx_id = INVALID_CTX_ID;
> + oa_context_id_squash(stream, report32);
> + }
>
>   /*
>* NB: For Gen 8 the OA unit no longer supports clock gating
> @@ -812,7 +871,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
> *stream,
>*/
>   if (stream->ctx &&
>   stream->specific_ctx_id != ctx_id) {
> - report32[2] = INVALID_CTX_ID;
> + oa_context_id_squash(stream, report32);
>   }
>
>   ret = append_oa_sample(stream, buf, count, offset,
> @@ -824,11 +883,11 @@ static int gen8_append_oa_reports(struct 
> i915_perf_stream *stream,
>   }
>
>   /*
> -  * Clear out the first 2 dword as a mean to detect unlanded
> +  * Clear out the report id and timestamp as a means to detect 
> unlanded
>* reports.
>*/
> - report32[0] = 0;
> - report32[1] = 0;
> + oa_report_id_clear(stream, report32);
> + oa_timestamp_clear(stream, report32);

Because we now have these new functions, why do we now need two pointers
report and report32 pointing to the same location? I think we can just have
a single 'void *report' which we can pass into all these functions,
correct?

With this change, this is:

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 13/19] drm/i915/perf: Add Wa_16010703925:dg2

2022-09-15 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:49 -0700, Umesh Nerlige Ramappa wrote:
>
> On DG2 A0, the OAR report format is buggy. Workaround is to not use it
> for A0. For A0, remove the OAR format from the bitmask of supported
> formats.

Are we going to support A0 upstream? If we are this is:

Reviewed-by: Ashutosh Dixit 

>
> Signed-off-by: Umesh Nerlige Ramappa 
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 167e7355980a..a28f07923d8f 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -4741,6 +4741,11 @@ static void oa_init_supported_formats(struct i915_perf 
> *perf)
>   default:
>   MISSING_CASE(platform);
>   }
> +
> + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0)) {
> + /* Wa_16010703925:dg2 */
> + clear_bit(I915_OAR_FORMAT_A36u64_B8_C8, perf->format_mask);
> + }
>  }
>
>  static void i915_perf_init_info(struct drm_i915_private *i915)
> --
> 2.25.1
>


Re: [Intel-gfx] [PATCH 14/19] drm/i915/perf: Add Wa_1608133521:dg2

2022-09-15 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:50 -0700, Umesh Nerlige Ramappa wrote:
>
> DG2 introduces 64 bit counters and OA reports that have 64 bit values
> for fields in the report header - report_id, timestamp, context_id and
> gpu ticks. i915 uses report_id, timestamp and context_id to check for
> valid reports.
>
> In some DG2 variants, only the lower dwords for timestamp, report_id and
> context_id are accessible. Add workaround for such reports.

Once again, if we are productizing A-step or it is going to be in CI
upstream, this is:

Reviewed-by: Ashutosh Dixit 

> Signed-off-by: Umesh Nerlige Ramappa 
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index a28f07923d8f..a858ce57e465 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -310,7 +310,7 @@ static u32 i915_oa_max_sample_rate = 10;
>   * be used as a mask to align the OA tail pointer. In some of the
>   * formats, R is used to denote reserved field.
>   */
> -static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
> +static struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
>   [I915_OA_FORMAT_A13]= { 0, 64 },
>   [I915_OA_FORMAT_A29]= { 1, 128 },
>   [I915_OA_FORMAT_A13_B8_C8]  = { 2, 128 },
> @@ -4746,6 +4746,13 @@ static void oa_init_supported_formats(struct i915_perf 
> *perf)
>   /* Wa_16010703925:dg2 */
>   clear_bit(I915_OAR_FORMAT_A36u64_B8_C8, perf->format_mask);
>   }
> +
> + if (IS_DG2_GRAPHICS_STEP(i915, G10, STEP_A0, STEP_B0) ||
> + IS_DG2_GRAPHICS_STEP(i915, G11, STEP_A0, STEP_FOREVER)) {
> + /* Wa_1608133521:dg2 */
> + oa_formats[I915_OAR_FORMAT_A36u64_B8_C8].header = HDR_32_BIT;
> + oa_formats[I915_OA_FORMAT_A38u64_R2u64_B8_C8].header = 
> HDR_32_BIT;
> + }
>  }
>
>  static void i915_perf_init_info(struct drm_i915_private *i915)
> --
> 2.25.1
>


Re: [Intel-gfx] [PATCH 15/19] drm/i915/perf: Add Wa_1508761755:dg2

2022-09-15 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:51 -0700, Umesh Nerlige Ramappa wrote:
>
> Disable Clock gating in EU when gathering the events so that EU events
> are not lost.

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 16/19] drm/i915/perf: Apply Wa_18013179988

2022-09-15 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:52 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> OA reports in the OA buffer contain an OA timestamp field that helps
> user calculate delta between 2 OA reports. The calculation relies on the
> CS timestamp frequency to convert the timestamp value to nanoseconds.
> The CS timestamp frequency is a function of the CTC_SHIFT value in
> RPM_CONFIG0.
>
> In DG2, OA unit assumes that the CTC_SHIFT is 3, instead of using the
> actual value from RPM_CONFIG0. At the user level, this results in an
> error in calculating delta between 2 OA reports since the OA timestamp
> is not shifted in the same manner as CS timestamp.
>
> To resolve this, return actual OA timestamp frequency to the user in
> i915_getparam_ioctl.

Rather than exposing actual OA timestamp frequency to userspace (with the
corresponding uapi change, specially if it's only DG2 and not all future
products) questions about a couple of other options:

Option 1. Can we set CTC_SHIFT in RPM_CONFIG0 to 3, so change GT freq to be the
  same as OA freq :-)

   The HSD seems to mention this:
   Is setting CTC SHIFT to 0b11 on driver init an acceptable W/A?
   Note: Changing the shift setting on live driver may break apps that are
   currently running (including desktop manager).

Option 2. Is it possible to correct the timestamps in OA report headers to
  compensate for the difference between OA and GT frequencies (say when
  copying OA data to userspace)?

  Though not sure if this is preferable to having userspace do this.

A couple of minor optional nits on that patch below too.

> +u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
> +{
> + /* Wa_18013179988:dg2 */
> + if (IS_DG2(i915)) {
> + intel_wakeref_t wakeref;
> + u32 reg, shift;
> +
> + with_intel_runtime_pm(to_gt(i915)->uncore->rpm, wakeref)
> + reg = intel_uncore_read(to_gt(i915)->uncore, 
> RPM_CONFIG0);
> +
> + shift = (reg & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
> +  GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT;

This can be:
shift = 
REG_FIELD_GET(GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);

>  static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent)
>  {
> - return intel_gt_clock_interval_to_ns(to_gt(perf->i915),
> -  2ULL << exponent);
> + u64 nom = (2ULL << exponent) * NSEC_PER_SEC;
> + u32 den = i915_perf_oa_timestamp_frequency(perf->i915);
> +
> + return div_u64(nom + den - 1, den);

div_u64_roundup?

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 17/19] drm/i915/perf: Save/restore EU flex counters across reset

2022-09-15 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:53 -0700, Umesh Nerlige Ramappa wrote:
>
> If a drm client is killed, then hw contexts used by the client are reset
> immediately. This reset clears the EU flex counter configuration. If an
> OA use case is running in parallel, it would start seeing zeroed eu
> counter values following the reset even if the drm client is restarted.
> Save/restore the EU flex counter config so that the EU counters can be
> monitored continuously across resets.

Reviewed-by: Ashutosh Dixit 

Not sure if this needs to be done for non-GuC (execlists) too? Anyway
that's a later patch.

> Signed-off-by: Umesh Nerlige Ramappa 
> ---
>  drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 8 
>  1 file changed, 8 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> index 74cbe8eaf531..3e152219fcb2 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
> @@ -375,6 +375,14 @@ static int guc_mmio_regset_init(struct temp_regset 
> *regset,
>   for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++)
>   ret |= GUC_MMIO_REG_ADD(gt, regset, GEN9_LNCFCMOCS(i), false);
>
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL0, false);
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL1, false);
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL2, false);
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL3, false);
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL4, false);
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL5, false);
> + ret |= GUC_MMIO_REG_ADD(gt, regset, EU_PERF_CNTL6, false);
> +
>   return ret ? -1 : 0;
>  }
>
> --
> 2.25.1
>


Re: [Intel-gfx] [PATCH 16/19] drm/i915/perf: Apply Wa_18013179988

2022-09-16 Thread Dixit, Ashutosh
On Thu, 15 Sep 2022 22:16:30 -0700, Dixit, Ashutosh wrote:
>
> On Tue, 23 Aug 2022 13:41:52 -0700, Umesh Nerlige Ramappa wrote:
> >
>
> Hi Umesh,
>
> > OA reports in the OA buffer contain an OA timestamp field that helps
> > user calculate delta between 2 OA reports. The calculation relies on the
> > CS timestamp frequency to convert the timestamp value to nanoseconds.
> > The CS timestamp frequency is a function of the CTC_SHIFT value in
> > RPM_CONFIG0.
> >
> > In DG2, OA unit assumes that the CTC_SHIFT is 3, instead of using the
> > actual value from RPM_CONFIG0. At the user level, this results in an
> > error in calculating delta between 2 OA reports since the OA timestamp
> > is not shifted in the same manner as CS timestamp.
> >
> > To resolve this, return actual OA timestamp frequency to the user in
> > i915_getparam_ioctl.
>
> Rather than exposing actual OA timestamp frequency to userspace (with the
> corresponding uapi change, specially if it's only DG2 and not all future
> products) questions about a couple of other options:
>
> Option 1. Can we set CTC_SHIFT in RPM_CONFIG0 to 3, so change GT freq to be 
> the
>   same as OA freq :-)
>
>The HSD seems to mention this:
>Is setting CTC SHIFT to 0b11 on driver init an acceptable W/A?
>Note: Changing the shift setting on live driver may break apps that are
>currently running (including desktop manager).
>
> Option 2. Is it possible to correct the timestamps in OA report headers to
>   compensate for the difference between OA and GT frequencies (say 
> when
>   copying OA data to userspace)?
>
> Though not sure if this is preferable to having userspace do this.

Also do we need input from userland on this patch? UMD's might need to
assess the impact of having different GT and OA frequencies at their end
since they consume OA data?

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 16/19] drm/i915/perf: Apply Wa_18013179988

2022-09-16 Thread Dixit, Ashutosh
On Fri, 16 Sep 2022 11:56:04 -0700, Umesh Nerlige Ramappa wrote:
>
> On Thu, Sep 15, 2022 at 10:16:30PM -0700, Dixit, Ashutosh wrote:
> > On Tue, 23 Aug 2022 13:41:52 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >
> > Hi Umesh,
> >
> >> OA reports in the OA buffer contain an OA timestamp field that helps
> >> user calculate delta between 2 OA reports. The calculation relies on the
> >> CS timestamp frequency to convert the timestamp value to nanoseconds.
> >> The CS timestamp frequency is a function of the CTC_SHIFT value in
> >> RPM_CONFIG0.
> >>
> >> In DG2, OA unit assumes that the CTC_SHIFT is 3, instead of using the
> >> actual value from RPM_CONFIG0. At the user level, this results in an
> >> error in calculating delta between 2 OA reports since the OA timestamp
> >> is not shifted in the same manner as CS timestamp.
> >>
> >> To resolve this, return actual OA timestamp frequency to the user in
> >> i915_getparam_ioctl.
> >
> > Rather than exposing actual OA timestamp frequency to userspace (with the
> > corresponding uapi change, specially if it's only DG2 and not all future
> > products) questions about a couple of other options:
> >
> > Option 1. Can we set CTC_SHIFT in RPM_CONFIG0 to 3, so change GT freq to be 
> > the
> >  same as OA freq :-)
> >
> >   The HSD seems to mention this:
> >   Is setting CTC SHIFT to 0b11 on driver init an acceptable W/A?
> >   Note: Changing the shift setting on live driver may break apps that are
> >   currently running (including desktop manager).
> >
> > Option 2. Is it possible to correct the timestamps in OA report headers to
> >  compensate for the difference between OA and GT frequencies (say 
> > when
> >  copying OA data to userspace)?
> >
> >   Though not sure if this is preferable to having userspace do this.
>
> It does affect other platforms too. There's no guarantee on what the
> CTC_SHIFT value would be for different platforms, so user would have to at
> least query that somehow (maybe from i915). It's simpler for user to use
> the exported OA frequency since it is also backwards compatible.

Is Option 2 above feasible since it would stop propagating the change to
various UMD's?

> https://patchwork.freedesktop.org/patch/498917/?series=107633&rev=3 is
> consumed by GPUvis. That reminds me, I should include the UMD links for the
> patches with uapi changes.

I was thinking more about UMD's which analayze OA data and who till now are
probably assuming OA freq == GT freq and will now have to drop that
assumption. So not sure how widespread would be these changes in
the (multiple different?) UMD(s).

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 16/19] drm/i915/perf: Apply Wa_18013179988

2022-09-16 Thread Dixit, Ashutosh
On Fri, 16 Sep 2022 13:25:17 -0700, Umesh Nerlige Ramappa wrote:
>
> On Fri, Sep 16, 2022 at 12:57:19PM -0700, Dixit, Ashutosh wrote:
> > On Fri, 16 Sep 2022 11:56:04 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >> On Thu, Sep 15, 2022 at 10:16:30PM -0700, Dixit, Ashutosh wrote:
> >> > On Tue, 23 Aug 2022 13:41:52 -0700, Umesh Nerlige Ramappa wrote:
> >> >>
> >> >
> >> > Hi Umesh,
> >> >
> >> >> OA reports in the OA buffer contain an OA timestamp field that helps
> >> >> user calculate delta between 2 OA reports. The calculation relies on the
> >> >> CS timestamp frequency to convert the timestamp value to nanoseconds.
> >> >> The CS timestamp frequency is a function of the CTC_SHIFT value in
> >> >> RPM_CONFIG0.
> >> >>
> >> >> In DG2, OA unit assumes that the CTC_SHIFT is 3, instead of using the
> >> >> actual value from RPM_CONFIG0. At the user level, this results in an
> >> >> error in calculating delta between 2 OA reports since the OA timestamp
> >> >> is not shifted in the same manner as CS timestamp.
> >> >>
> >> >> To resolve this, return actual OA timestamp frequency to the user in
> >> >> i915_getparam_ioctl.
> >> >
> >> > Rather than exposing actual OA timestamp frequency to userspace (with the
> >> > corresponding uapi change, specially if it's only DG2 and not all future
> >> > products) questions about a couple of other options:
> >> >
> >> > Option 1. Can we set CTC_SHIFT in RPM_CONFIG0 to 3, so change GT freq to 
> >> > be the
> >> >  same as OA freq :-)
> >> >
> >> >   The HSD seems to mention this:
> >> >   Is setting CTC SHIFT to 0b11 on driver init an acceptable W/A?
> >> >   Note: Changing the shift setting on live driver may break apps that are
> >> >   currently running (including desktop manager).
> >> >
> >> > Option 2. Is it possible to correct the timestamps in OA report headers 
> >> > to
> >> >  compensate for the difference between OA and GT frequencies 
> >> > (say when
> >> >  copying OA data to userspace)?
> >> >
> >> >Though not sure if this is preferable to having userspace do this.
> >>
> >> It does affect other platforms too. There's no guarantee on what the
> >> CTC_SHIFT value would be for different platforms, so user would have to at
> >> least query that somehow (maybe from i915). It's simpler for user to use
> >> the exported OA frequency since it is also backwards compatible.
> >
> > Is Option 2 above feasible since it would stop propagating the change to
> > various UMD's?
>
> Hmm, there is logic today that squashes context ids when doing oa buffer
> filtering, but it does that on selective reports (i.e. if a gem_context is
> passed).
>
> For this issue: for a 16MB OA buffer with 256 byte reports, that would be
> an additional write of 262144 in the kmd (to smem). For 20us sampled OA
> reports, it would be approx. 195 KB/s. Shouldn't be too much. Only 2
> concerns:
>
> - the mmapped use case may break, but I don't see that being upstreamed.
> We may have divergent solutions for upstream and internal.
> - blocking/polling tests in IGT will be sensitive to this change on some
> platforms and may need to be bolstered.

If this correction/compensation in the kernel works out, even for internal
too we could do the following:

* For non-mmaped case, do the correction in the kernel and expose OA freq
  == GT freq (in the getparam ioctl)
* For mmaped case expose the actual OA freq (!= GT freq)

This will restrict the divergence only to the mmaped case (which we will
probably not be able to upstream).

>
> I will give it a shot and get back,
>
> Thanks,
> Umesh
>
> >
> >> https://patchwork.freedesktop.org/patch/498917/?series=107633&rev=3 is
> >> consumed by GPUvis. That reminds me, I should include the UMD links for the
> >> patches with uapi changes.
> >
> > I was thinking more about UMD's which analayze OA data and who till now are
> > probably assuming OA freq == GT freq and will now have to drop that
> > assumption. So not sure how widespread would be these changes in
> > the (multiple different?) UMD(s).
> >
> > Thanks.
> > --
> > Ashutosh


Re: [Intel-gfx] [PATCH 18/19] drm/i915/guc: Support OA when Wa_16011777198 is enabled

2022-09-16 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:54 -0700, Umesh Nerlige Ramappa wrote:
>
> From: Vinay Belgaumkar 
>
> There is a w/a to reset RCS/CCS before it goes into RC6. This breaks
> OA. Fix it by disabling RC6.

Need to mention DG2 in the commit message?

> Signed-off-by: Vinay Belgaumkar 
> ---
>  .../drm/i915/gt/uc/abi/guc_actions_slpc_abi.h |  9 
>  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c   | 45 +++
>  drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h   |  2 +
>  drivers/gpu/drm/i915/i915_perf.c  | 29 
>  4 files changed, 85 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h 
> b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
> index 4c840a2639dc..811add10c30d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
> +++ b/drivers/gpu/drm/i915/gt/uc/abi/guc_actions_slpc_abi.h
> @@ -128,6 +128,15 @@ enum slpc_media_ratio_mode {
>   SLPC_MEDIA_RATIO_MODE_FIXED_ONE_TO_TWO = 2,
>  };
>
> +enum slpc_gucrc_mode {
> + SLPC_GUCRC_MODE_HW = 0,
> + SLPC_GUCRC_MODE_GUCRC_NO_RC6 = 1,
> + SLPC_GUCRC_MODE_GUCRC_STATIC_TIMEOUT = 2,
> + SLPC_GUCRC_MODE_GUCRC_DYNAMIC_HYSTERESIS = 3,
> +
> + SLPC_GUCRC_MODE_MAX,
> +};
> +
>  enum slpc_event_id {
>   SLPC_EVENT_RESET = 0,
>   SLPC_EVENT_SHUTDOWN = 1,
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> index e1fa1f32f29e..23989f5452a7 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
> @@ -642,6 +642,51 @@ static void slpc_get_rp_values(struct intel_guc_slpc 
> *slpc)
>   slpc->boost_freq = slpc->rp0_freq;
>  }
>
> +/**
> + * intel_guc_slpc_override_gucrc_mode() - override GUCRC mode
> + * @slpc: pointer to intel_guc_slpc.
> + * @mode: new value of the mode.
> + *
> + * This function will override the GUCRC mode.
> + *
> + * Return: 0 on success, non-zero error code on failure.
> + */
> +int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 mode)
> +{
> + int ret;
> + struct drm_i915_private *i915 = slpc_to_i915(slpc);
> + intel_wakeref_t wakeref;
> +
> + if (mode >= SLPC_GUCRC_MODE_MAX)
> + return -EINVAL;
> +
> + wakeref = intel_runtime_pm_get(&i915->runtime_pm);
> +
> + ret = slpc_set_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
> + if (ret)
> + drm_err(&i915->drm,
> + "Override gucrc mode %d failed %d\n",
> + mode, ret);
> +
> + intel_runtime_pm_put(&i915->runtime_pm, wakeref);

nit but I think let's switch to with_intel_runtime_pm() since all other
slpc functions use that.

> +
> + return ret;
> +}
> +
> +int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc)
> +{
> + struct drm_i915_private *i915 = slpc_to_i915(slpc);
> + int ret = 0;
> +
> + ret = slpc_unset_param(slpc, SLPC_PARAM_PWRGATE_RC_MODE);

Looks like slpc_unset_param() is not present so that needs to be added to
the patch too, otherwise probably doesn't even compile.

> + if (ret)
> + drm_err(&i915->drm,
> + "Unsetting gucrc mode failed %d\n",
> + ret);
> +
> + return ret;
> +}
> +
>  /*
>   * intel_guc_slpc_enable() - Start SLPC
>   * @slpc: pointer to intel_guc_slpc.
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
> index 82a98f78f96c..ccf483730d9d 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.h
> @@ -42,5 +42,7 @@ int intel_guc_slpc_set_media_ratio_mode(struct 
> intel_guc_slpc *slpc, u32 val);
>  void intel_guc_pm_intrmsk_enable(struct intel_gt *gt);
>  void intel_guc_slpc_boost(struct intel_guc_slpc *slpc);
>  void intel_guc_slpc_dec_waiters(struct intel_guc_slpc *slpc);
> +int intel_guc_slpc_unset_gucrc_mode(struct intel_guc_slpc *slpc);
> +int intel_guc_slpc_override_gucrc_mode(struct intel_guc_slpc *slpc, u32 
> mode);
>
>  #endif
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index 132c2ce8b33b..ce1b6ad4d107 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -208,6 +208,7 @@
>  #include "gt/intel_lrc.h"
>  #include "gt/intel_lrc_reg.h"
>  #include "gt/intel_ring.h"
> +#include "gt/uc/intel_guc_slpc.h"
>
>  #include "i915_drv.h"
>  #include "i915_file_private.h"
> @@ -1651,6 +1652,16 @@ static void i915_oa_stream_destroy(struct 
> i915_perf_stream *stream)
>
>   free_oa_buffer(stream);
>
> + /*
> +  * Wa_16011777198:dg2: Unset the override of GUCRC mode to enable rc6.
> +  */
> + if (intel_guc_slpc_is_used(>->uc.guc) &&
> + intel_uc_uses_guc_rc(>->uc) &&
> + (IS_DG2_GRAPHICS_STEP(gt->i915, G10, STEP_A0, STEP_C0) ||
> +  IS_DG2_GRAPHICS_STEP(gt->i915, G11, STEP_A0, STEP_B0)))

Do these steppings need to be tweaked,

Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for Introduce struct cdclk_step

2022-09-16 Thread Dixit, Ashutosh
On Fri, 16 Sep 2022 18:35:13 -0700, Patchwork wrote:
>

Hi Lakshmi,

> Series:  Introduce struct cdclk_step
> URL: https://patchwork.freedesktop.org/series/108685/
> State:   failure
> Details: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_108685v1/index.html
>
> CI Bug Log - changes from CI_DRM_12148 -> Patchwork_108685v1
>
> Summary
>
> FAILURE
>
> Serious unknown changes coming with Patchwork_108685v1 absolutely need to be
> verified manually.
>
> If you think the reported changes have nothing to do with the changes
> introduced in Patchwork_108685v1, please notify your bug team to allow them
> to document this new failure mode, which will reduce false positives in CI.
>
> External URL: 
> https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_108685v1/index.html
>
> Participating hosts (43 -> 41)
>
> Additional (2): fi-icl-u2 fi-pnv-d510
> Missing (4): fi-ctg-p8600 fi-hsw-4200u fi-bdw-samus bat-dg1-5
>
> Possible new issues
>
> Here are the unknown changes that may have been introduced in 
> Patchwork_108685v1:
>
> IGT changes
>
> Possible regressions
>
>   • igt@debugfs_test@read_all_entries:
>   □ fi-pnv-d510: NOTRUN -> INCOMPLETE

This failure is unrelated and needs a new bug. Seems to be caused by:

fe5979665f640 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs")

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915/debugfs: Fix perf_limit_reasons debugfs

2022-09-18 Thread Dixit, Ashutosh
On Fri, 16 Sep 2022 20:15:01 -0700, Ashutosh Dixit wrote:
>
> Register GT0_PERF_LIMIT_REASONS (0x1381a8) is available only for Gen11+.

PLEASE IGNORE THIS PATCH. I will submit a different patch for this issue.

Thanks.
--
Ashutosh

> On Gen < 5 igt@debugfs_test@read_all_entries results in the following oops:
>
> <1> [88.829420] BUG: unable to handle page fault for address: c9bb81a8
> <1> [88.829438] #PF: supervisor read access in kernel mode
> <1> [88.829447] #PF: error_code(0x) - not-present page
>
> Bspec: 20008
> Fixes: fe5979665f64 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs")
> Signed-off-by: Ashutosh Dixit 
> ---
>  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 6 ++
>  1 file changed, 6 insertions(+)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> index 68310881a793..e7f057821cd0 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> @@ -660,6 +660,9 @@ static int perf_limit_reasons_get(void *data, u64 *val)
>   struct intel_gt *gt = data;
>   intel_wakeref_t wakeref;
>
> + if (GRAPHICS_VER(gt->i915) < 11)
> + return 0;
> +
>   with_intel_runtime_pm(gt->uncore->rpm, wakeref)
>   *val = intel_uncore_read(gt->uncore, 
> intel_gt_perf_limit_reasons_reg(gt));
>
> @@ -671,6 +674,9 @@ static int perf_limit_reasons_clear(void *data, u64 val)
>   struct intel_gt *gt = data;
>   intel_wakeref_t wakeref;
>
> + if (GRAPHICS_VER(gt->i915) < 11)
> + return 0;
> +
>   /*
>* Clear the upper 16 "log" bits, the lower 16 "status" bits are
>* read-only. The upper 16 "log" bits are identical to the lower 16
> --
> 2.34.1
>


Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for Introduce struct cdclk_step

2022-09-19 Thread Dixit, Ashutosh
On Sun, 18 Sep 2022 23:35:56 -0700, Vudum, Lakshminarayana wrote:
>

Hi Lakshmi,

> Filed a couple of issues and re-reported.
>
> This one Likely a regression?
> https://gitlab.freedesktop.org/drm/intel/-/issues/6864
> Few tests - dmesg-warn/dmesg-fail/incomplete -BUG: unable to handle page 
> fault for address .*, #PF: supervisor read access in kernel mode, RIP: 
> 0010:__list_add_valid, Call Trace: dma_fence_default_wait, 
> dma_fence_add_callback
>
> https://gitlab.freedesktop.org/drm/intel/-/issues/6863
> igt@debugfs_test@read_all_entries - incomplete - BUG: unable to handle page 
> fault for address: c9bb81a8, RIP: 0010:gen2_read32

Not sure about https://gitlab.freedesktop.org/drm/intel/-/issues/6864, I
didn't see it.

I was mentioning
https://gitlab.freedesktop.org/drm/intel/-/issues/6863. This is a
regression. I have already submitted a fix for it:

https://patchwork.freedesktop.org/series/108747/

Thanks.
--
Ashutosh

> Lakshmi.
>
> -Original Message-
> From: Dixit, Ashutosh 
> Sent: Friday, September 16, 2022 7:08 PM
> To: intel-gfx@lists.freedesktop.org; Vudum, Lakshminarayana 
> 
> Cc: Srivatsa, Anusha 
> Subject: Re: [Intel-gfx] ✗ Fi.CI.BAT: failure for Introduce struct cdclk_step
>
> On Fri, 16 Sep 2022 18:35:13 -0700, Patchwork wrote:
> >
>
> Hi Lakshmi,
>
> > Series:  Introduce struct cdclk_step
> > URL: https://patchwork.freedesktop.org/series/108685/
> > State:   failure
> > Details:
> > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_108685v1/index.html
> >
> > CI Bug Log - changes from CI_DRM_12148 -> Patchwork_108685v1
> >
> > Summary
> >
> > FAILURE
> >
> > Serious unknown changes coming with Patchwork_108685v1 absolutely need
> > to be verified manually.
> >
> > If you think the reported changes have nothing to do with the changes
> > introduced in Patchwork_108685v1, please notify your bug team to allow
> > them to document this new failure mode, which will reduce false positives 
> > in CI.
> >
> > External URL:
> > https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_108685v1/index.html
> >
> > Participating hosts (43 -> 41)
> >
> > Additional (2): fi-icl-u2 fi-pnv-d510
> > Missing (4): fi-ctg-p8600 fi-hsw-4200u fi-bdw-samus bat-dg1-5
> >
> > Possible new issues
> >
> > Here are the unknown changes that may have been introduced in 
> > Patchwork_108685v1:
> >
> > IGT changes
> >
> > Possible regressions
> >
> >   • igt@debugfs_test@read_all_entries:
> >   □ fi-pnv-d510: NOTRUN -> INCOMPLETE
>
> This failure is unrelated and needs a new bug. Seems to be caused by:
>
>   fe5979665f640 ("drm/i915/debugfs: Add perf_limit_reasons in debugfs")
>
> Thanks.
> --
> Ashutosh


Re: [Intel-gfx] [PATCH 03/19] drm/i915/perf: Fix noa wait predication for DG2

2022-09-19 Thread Dixit, Ashutosh
On Tue, 23 Aug 2022 13:41:39 -0700, Umesh Nerlige Ramappa wrote:
>
> Predication for batch buffer commands changed in XEHPSDV.
> MI_BATCH_BUFFER_START predicates based on MI_SET_PREDICATE_RESULT
> register. The MI_SET_PREDICATE_RESULT register can only be modified
> with MI_SET_PREDICATE command. When configured, the MI_SET_PREDICATE
> command sets MI_SET_PREDICATE_RESULT based on bit 0 of
> MI_PREDICATE_RESULT_2. Use this to configure predication in noa_wait.

Reviewed-by: Ashutosh Dixit 

> Signed-off-by: Umesh Nerlige Ramappa 
> ---
>  drivers/gpu/drm/i915/gt/intel_engine_regs.h |  1 +
>  drivers/gpu/drm/i915/i915_perf.c| 24 +
>  2 files changed, 21 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_regs.h 
> b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> index 889f0df3940b..25d23f3a4769 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_regs.h
> @@ -200,6 +200,7 @@
>  #define RING_CONTEXT_STATUS_PTR(base)_MMIO((base) + 0x3a0)
>  #define RING_CTX_TIMESTAMP(base) _MMIO((base) + 0x3a8) /* gen8+ 
> */
>  #define RING_PREDICATE_RESULT(base)  _MMIO((base) + 0x3b8)
> +#define MI_PREDICATE_RESULT_2_ENGINE(base)   _MMIO((base) + 0x3bc)
>  #define RING_FORCE_TO_NONPRIV(base, i)   _MMIO(((base) + 0x4D0) 
> + (i) * 4)
>  #define   RING_FORCE_TO_NONPRIV_DENY REG_BIT(30)
>  #define   RING_FORCE_TO_NONPRIV_ADDRESS_MASK REG_GENMASK(25, 2)
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index c8331b549d31..3526693d64fa 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -286,6 +286,7 @@ static u32 i915_perf_stream_paranoid = true;
>  #define OAREPORT_REASON_CTX_SWITCH (1<<3)
>  #define OAREPORT_REASON_CLK_RATIO  (1<<5)
>
> +#define HAS_MI_SET_PREDICATE(i915) (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 
> 50))
>
>  /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
>   *
> @@ -1766,6 +1767,9 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
>   DELTA_TARGET,
>   N_CS_GPR
>   };
> + i915_reg_t mi_predicate_result = HAS_MI_SET_PREDICATE(i915) ?
> +   MI_PREDICATE_RESULT_2_ENGINE(base) :
> +   
> MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
>
>   bo = i915_gem_object_create_internal(i915, 4096);
>   if (IS_ERR(bo)) {
> @@ -1803,7 +1807,7 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
>   stream, cs, true /* save */, CS_GPR(i),
>   INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>   cs = save_restore_register(
> - stream, cs, true /* save */, 
> MI_PREDICATE_RESULT_1(RENDER_RING_BASE),
> + stream, cs, true /* save */, mi_predicate_result,
>   INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>
>   /* First timestamp snapshot location. */
> @@ -1857,7 +1861,10 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
>*/
>   *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
>   *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
> - *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
> + *cs++ = i915_mmio_reg_offset(mi_predicate_result);
> +
> + if (HAS_MI_SET_PREDICATE(i915))
> + *cs++ = MI_SET_PREDICATE | 1;
>
>   /* Restart from the beginning if we had timestamps roll over. */
>   *cs++ = (GRAPHICS_VER(i915) < 8 ?
> @@ -1867,6 +1874,9 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
>   *cs++ = i915_ggtt_offset(vma) + (ts0 - batch) * 4;
>   *cs++ = 0;
>
> + if (HAS_MI_SET_PREDICATE(i915))
> + *cs++ = MI_SET_PREDICATE;
> +
>   /*
>* Now add the diff between to previous timestamps and add it to :
>*  (((1 * << 64) - 1) - delay_ns)
> @@ -1894,7 +1904,10 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
>*/
>   *cs++ = MI_LOAD_REGISTER_REG | (3 - 2);
>   *cs++ = i915_mmio_reg_offset(CS_GPR(JUMP_PREDICATE));
> - *cs++ = i915_mmio_reg_offset(MI_PREDICATE_RESULT_1(RENDER_RING_BASE));
> + *cs++ = i915_mmio_reg_offset(mi_predicate_result);
> +
> + if (HAS_MI_SET_PREDICATE(i915))
> + *cs++ = MI_SET_PREDICATE | 1;
>
>   /* Predicate the jump.  */
>   *cs++ = (GRAPHICS_VER(i915) < 8 ?
> @@ -1904,13 +1917,16 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
>   *cs++ = i915_ggtt_offset(vma) + (jump - batch) * 4;
>   *cs++ = 0;
>
> + if (HAS_MI_SET_PREDICATE(i915))
> + *cs++ = MI_SET_PREDICATE;
> +
>   /* Restore registers. */
>   for (i = 0; i < N_CS_GPR; i++)
>   cs = save_restore_register(
>   stream, cs, false /* restore */, CS_GPR(i),
>   INTEL_GT_SCRATCH_

Re: [Intel-gfx] [PATCH 16/19] drm/i915/perf: Apply Wa_18013179988

2022-09-19 Thread Dixit, Ashutosh
On Mon, 19 Sep 2022 14:21:07 -0700, Umesh Nerlige Ramappa wrote:
>
> On Fri, Sep 16, 2022 at 02:00:19PM -0700, Dixit, Ashutosh wrote:
> > On Fri, 16 Sep 2022 13:25:17 -0700, Umesh Nerlige Ramappa wrote:
> >>
> >> On Fri, Sep 16, 2022 at 12:57:19PM -0700, Dixit, Ashutosh wrote:
> >> > On Fri, 16 Sep 2022 11:56:04 -0700, Umesh Nerlige Ramappa wrote:
> >> >>
> >> >> On Thu, Sep 15, 2022 at 10:16:30PM -0700, Dixit, Ashutosh wrote:
> >> >> > On Tue, 23 Aug 2022 13:41:52 -0700, Umesh Nerlige Ramappa wrote:
> >> >> >>
> >> >> >
> >> >> > Hi Umesh,
> >> >> >
> >> >> >> OA reports in the OA buffer contain an OA timestamp field that helps
> >> >> >> user calculate delta between 2 OA reports. The calculation relies on 
> >> >> >> the
> >> >> >> CS timestamp frequency to convert the timestamp value to nanoseconds.
> >> >> >> The CS timestamp frequency is a function of the CTC_SHIFT value in
> >> >> >> RPM_CONFIG0.
> >> >> >>
> >> >> >> In DG2, OA unit assumes that the CTC_SHIFT is 3, instead of using the
> >> >> >> actual value from RPM_CONFIG0. At the user level, this results in an
> >> >> >> error in calculating delta between 2 OA reports since the OA 
> >> >> >> timestamp
> >> >> >> is not shifted in the same manner as CS timestamp.
> >> >> >>
> >> >> >> To resolve this, return actual OA timestamp frequency to the user in
> >> >> >> i915_getparam_ioctl.
> >> >> >
> >> >> > Rather than exposing actual OA timestamp frequency to userspace (with 
> >> >> > the
> >> >> > corresponding uapi change, specially if it's only DG2 and not all 
> >> >> > future
> >> >> > products) questions about a couple of other options:
> >> >> >
> >> >> > Option 1. Can we set CTC_SHIFT in RPM_CONFIG0 to 3, so change GT freq 
> >> >> > to be the
> >> >> >  same as OA freq :-)
> >> >> >
> >> >> >   The HSD seems to mention this:
> >> >> >   Is setting CTC SHIFT to 0b11 on driver init an acceptable W/A?
> >> >> >   Note: Changing the shift setting on live driver may break apps that 
> >> >> > are
> >> >> >   currently running (including desktop manager).
> >> >> >
> >> >> > Option 2. Is it possible to correct the timestamps in OA report 
> >> >> > headers to
> >> >> >  compensate for the difference between OA and GT frequencies 
> >> >> > (say when
> >> >> >  copying OA data to userspace)?
> >> >> >
> >> >> > Though not sure if this is preferable to having userspace do 
> >> >> > this.
> >> >>
> >> >> It does affect other platforms too. There's no guarantee on what the
> >> >> CTC_SHIFT value would be for different platforms, so user would have to 
> >> >> at
> >> >> least query that somehow (maybe from i915). It's simpler for user to use
> >> >> the exported OA frequency since it is also backwards compatible.
> >> >
> >> > Is Option 2 above feasible since it would stop propagating the change to
> >> > various UMD's?
> >>
> >> Hmm, there is logic today that squashes context ids when doing oa buffer
> >> filtering, but it does that on selective reports (i.e. if a gem_context is
> >> passed).
> >>
> >> For this issue: for a 16MB OA buffer with 256 byte reports, that would be
> >> an additional write of 262144 in the kmd (to smem). For 20us sampled OA
> >> reports, it would be approx. 195 KB/s. Shouldn't be too much. Only 2
> >> concerns:
> >>
> >> - the mmapped use case may break, but I don't see that being upstreamed.
> >> We may have divergent solutions for upstream and internal.
> >> - blocking/polling tests in IGT will be sensitive to this change on some
> >> platforms and may need to be bolstered.
> >
> > If this correction/compensation in the kernel works out, even for internal
> > too we could do the following:
> >
> > * For non-mmaped case, do the correction in the kernel an

Re: [Intel-gfx] [PATCH 01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

2022-09-19 Thread Dixit, Ashutosh
On Thu, 15 Sep 2022 15:49:27 -0700, Umesh Nerlige Ramappa wrote:
>
> On Wed, Sep 14, 2022 at 04:13:41PM -0700, Umesh Nerlige Ramappa wrote:
> > On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
> >> On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
> >>> On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
>  On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
> > On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> >> With GuC mode of submission, GuC is in control of defining the
> >> context id field
> >> that is part of the OA reports. To filter reports, UMD and KMD must
> >> know what sw
> >> context id was chosen by GuC. There is not interface between KMD and
> >> GuC to
> >> determine this, so read the upper-dword of EXECLIST_STATUS to
> >> filter/squash OA
> >> reports for the specific context.
> >>
> >> Signed-off-by: Umesh Nerlige Ramappa 
> >
> >
> > I assume you checked with GuC that this doesn't change as the context
> > is running?
> 
>  Correct.
> 
> >
> > With i915/execlist submission mode, we had to ask i915 to pin the
> > sw_id/ctx_id.
> >
> 
>  From GuC perspective, the context id can change once KMD de-registers
>  the context and that will not happen while the context is in use.
> 
>  Thanks,
>  Umesh
> >>>
> >>>
> >>> Thanks Umesh,
> >>>
> >>>
> >>> Maybe I should have been more precise in my question :
> >>>
> >>>
> >>> Can the ID change while the i915-perf stream is opened?
> >>>
> >>> Because the ID not changing while the context is running makes sense.
> >>>
> >>> But since the number of available IDs is limited to 2k or something on
> >>> Gfx12, it's possible the GuC has to reuse IDs if too many apps want to
> >>> run during the period of time while i915-perf is active and filtering.
> >>>
> >>
> >> available guc ids are 64k with 4k reserved for multi-lrc, so GuC may
> >> have to reuse ids once 60k ids are used up.
> >
> > Spoke to the GuC team again and if there are a lot of contexts (> 60K)
> > running, there is a possibility of the context id being recycled. In that
> > case, the capture would be broken. I would track this as a separate JIRA
> > and follow up on a solution.
> >
> > From OA use case perspective, are we interested in monitoring just one
> > hardware context? If we make sure this context is not stolen, are we
> > good?
> >
>
> + John
>
> Based on John's inputs - if a context is pinned, then KMD does not steal
> it's id. It would just look for something else or wait for a context to be
> available (pin count 0 I believe).
>
> Since we pin the context for the duration of the OA use case, we should be
> good here.

Since this appears to be true I am thinking of okay'ing this patch rather
than define a new interface with GuC for this. Let me know if there are any
objections about this.

Thanks.
--
Ashutosh

> >>> -Lionel
> >>>
> >>>
> 
> >
> > If that's not the case then filtering is broken.
> >
> >
> > -Lionel
> >
> >


Re: [Intel-gfx] [PATCH 2/2] drm/i915/mtl: Add C6 residency support for MTL SAMedia

2022-09-19 Thread Dixit, Ashutosh
On Mon, 19 Sep 2022 05:13:18 -0700, Jani Nikula wrote:
>
> On Mon, 19 Sep 2022, Badal Nilawar  wrote:
> > For MTL SAMedia updated relevant functions and places in the code to get
> > Media C6 residency.
> >
> > v2: Fixed review comments (Ashutosh)
> >
> > Cc: Vinay Belgaumkar 
> > Cc: Ashutosh Dixit 
> > Cc: Chris Wilson 
> > Signed-off-by: Badal Nilawar 
> > ---
> >  drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c | 60 +++
> >  drivers/gpu/drm/i915/gt/intel_gt_regs.h   | 10 
> >  drivers/gpu/drm/i915/gt/intel_gt_sysfs_pm.c   |  9 ++-
> >  drivers/gpu/drm/i915/gt/intel_rc6.c   |  5 +-
> >  drivers/gpu/drm/i915/gt/selftest_rc6.c|  9 ++-
> >  drivers/gpu/drm/i915/i915_pmu.c   |  8 ++-
> >  6 files changed, 97 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c 
> > b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > index 68310881a793..053167b506a9 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm_debugfs.c
> > @@ -269,6 +269,64 @@ static int ilk_drpc(struct seq_file *m)
> > return 0;
> >  }
> >
> > +static int mtl_drpc(struct seq_file *m)
> > +{
> > +   struct intel_gt *gt = m->private;
> > +   struct intel_uncore *uncore = gt->uncore;
> > +   u32 gt_core_status, rcctl1, global_forcewake;
> > +   u32 mtl_powergate_enable = 0, mtl_powergate_status = 0;
> > +   i915_reg_t reg;
> > +
> > +   gt_core_status = intel_uncore_read(uncore, MTL_MIRROR_TARGET_WP1);
> > +
> > +   global_forcewake = intel_uncore_read(uncore, FORCEWAKE_GT_GEN9);
> > +
> > +   rcctl1 = intel_uncore_read(uncore, GEN6_RC_CONTROL);
> > +   mtl_powergate_enable = intel_uncore_read(uncore, GEN9_PG_ENABLE);
> > +   mtl_powergate_status = intel_uncore_read(uncore,
> > +GEN9_PWRGT_DOMAIN_STATUS);
> > +
> > +   seq_printf(m, "RC6 Enabled: %s\n",
> > +  str_yes_no(rcctl1 & GEN6_RC_CTL_RC6_ENABLE));
> > +   if (gt->type == GT_MEDIA) {
> > +   seq_printf(m, "Media Well Gating Enabled: %s\n",
> > +  str_yes_no(mtl_powergate_enable & 
> > GEN9_MEDIA_PG_ENABLE));
> > +   } else {
> > +   seq_printf(m, "Render Well Gating Enabled: %s\n",
> > +  str_yes_no(mtl_powergate_enable & 
> > GEN9_RENDER_PG_ENABLE));
> > +   }
> > +
> > +   seq_puts(m, "Current RC state: ");
> > +
> > +   switch ((gt_core_status & MTL_CC_MASK) >> MTL_CC_SHIFT) {
> > +   case MTL_CC0:
> > +   seq_puts(m, "on\n");
> > +   break;
> > +   case MTL_CC6:
> > +   seq_puts(m, "RC6\n");
> > +   break;
> > +   default:
> > +   seq_puts(m, "Unknown\n");
> > +   break;
> > +   }
> > +
> > +   if (gt->type == GT_MEDIA)
> > +   seq_printf(m, "Media Power Well: %s\n",
> > +  (mtl_powergate_status &
> > +   GEN9_PWRGT_MEDIA_STATUS_MASK) ? "Up" : "Down");
> > +   else
> > +   seq_printf(m, "Render Power Well: %s\n",
> > +  (mtl_powergate_status &
> > +   GEN9_PWRGT_RENDER_STATUS_MASK) ? "Up" : "Down");
> > +
> > +   reg = (gt->type == GT_MEDIA) ? MTL_MEDIA_MC6 : GEN6_GT_GFX_RC6;
> > +   print_rc6_res(m, "RC6 residency since boot:", reg);
>
> Cc: Tvrtko, Joonas, Rodrigo
>

Hi Jani,

> IMO the register is not a good abstraction to build interfaces on. I see
> that this is not where the idea is introduced, but it'll probably get
> you in trouble later on.

By "this is not where the idea is introduced" are you referring to what we
did here:

https://patchwork.freedesktop.org/patch/502372/?series=108091&rev=5

in intel_gt_perf_limit_reasons_reg()?

Or, should we follow the schema of centralizing the register selection
depending on gt type in a single function here too (since this register
selection is repeated throughout this patch)?

Thanks.
--
Ashutosh



>
> BR,
> Jani.
>
> > +
> > +   seq_printf(m, "Global Forcewake Requests: 0x%x\n", global_forcewake);
> > +
> > +   return fw_domains_show(m, NULL);
> > +}
> > +
> >  static int drpc_show(struct seq_file *m, void *unused)
> >  {
> > struct intel_gt *gt = m->private;
> > @@ -279,6 +337,8 @@ static int drpc_show(struct seq_file *m, void *unused)
> > with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> > if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915))
> > err = vlv_drpc(m);
> > +   else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
> > +   err = mtl_drpc(m);
> > else if (GRAPHICS_VER(i915) >= 6)
> > err = gen6_drpc(m);
> > else
> > diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h 
> > b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > index 7819d32db956..8a56fd873228 100644
> > --- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > +++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
> > @@ -1517,6 +1517,16 @@
> >   */
> >  #define MTL_MIRROR_TARGET_WP1

Re: [Intel-gfx] [PATCH 3/7] drm/i915/hwmon: Power PL1 limit and TDP setting

2022-09-20 Thread Dixit, Ashutosh
On Fri, 16 Sep 2022 08:00:50 -0700, Badal Nilawar wrote:
>
> diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
> b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> index e2974f928e58..bc061238e35c 100644
> --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> @@ -5,3 +5,23 @@ Contact: dri-de...@lists.freedesktop.org
>  Description: RO. Current Voltage in millivolt.
>
>   Only supported for particular Intel i915 graphics platforms.
> +
> +What:/sys/devices/.../hwmon/hwmon/power1_max
> +Date:September 2022
> +KernelVersion:   6

Maybe we should ask someone but even if we merge this today to drm-tip this
will appear in kernel.org Linus' version only in 6.2. So I think we should
set this as 6.2 on all patches.

Except for this, thanks for making the changes, this is:

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 2/7] drm/i915/hwmon: Add HWMON current voltage support

2022-09-20 Thread Dixit, Ashutosh
On Thu, 15 Sep 2022 07:40:37 -0700, Nilawar, Badal wrote:
>
> On 29-08-2022 23:00, Dixit, Ashutosh wrote:
> > On Thu, 25 Aug 2022 06:21:13 -0700, Badal Nilawar wrote:
> >>
> >> +static int
> >> +hwm_in_read(struct hwm_drvdata *ddat, u32 attr, long *val)
> >> +{
> >> +  struct i915_hwmon *hwmon = ddat->hwmon;
> >> +  intel_wakeref_t wakeref;
> >> +  u32 reg_value;
> >> +
> >> +  switch (attr) {
> >> +  case hwmon_in_input:
> >> +  with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
> >> +  reg_value = intel_uncore_read(ddat->uncore, 
> >> hwmon->rg.gt_perf_status);
> >> +  /* In units of 2.5 millivolt */
> >> +  *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, 
> >> reg_value) * 25, 10);
>
> And use above scale factors here.
> *val = DIV_ROUND_CLOSEST(REG_FIELD_GET(GEN12_VOLTAGE_MASK, reg_value) *
> SF_VOLTAGE_MUL, SF_VOLTAGE_DIV);
> Regards,
> Badal
> >
> > Let's complete this comment to so that it is clear what's happening:
> >
> > /* HW register value is in units of 2.5 millivolt */

This was missed in the latest rev so if we could remember to add this that 
would be great.


Re: [Intel-gfx] [PATCH 4/7] drm/i915/hwmon: Show device level energy usage

2022-09-20 Thread Dixit, Ashutosh
On Tue, 13 Sep 2022 01:50:08 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 25/08/2022 14:21, Badal Nilawar wrote:
> > From: Dale B Stimson 
> >
> > diff --git a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon 
> > b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > index 9a2d10edfce8..03d71c6869d3 100644
> > --- a/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > +++ b/Documentation/ABI/testing/sysfs-driver-intel-i915-hwmon
> > @@ -25,3 +25,11 @@ Contact: dri-de...@lists.freedesktop.org
> >   Description:  RO. Card default power limit (default TDP setting).
> > Only supported for particular Intel i915 graphics
> > platforms.
> > +
> > +What:  /sys/devices/.../hwmon/hwmon/energy1_input
> > +Date:  June 2022
> > +KernelVersion: 5.19
>
> Date and kernel version will need updating throughout I think.
>
> But why I am here actually is to ask if there are plans to make
> intel_gpu_top support this? It would be nice to have since it reports power
> for integrated.

There were no plans but now Riana has an IGT patch series which exposes a
unified inteface for rapl/hwmon (igfx/dgfx):

https://patchwork.freedesktop.org/series/108185/

So perhaps we can either have intel_gpu_top use this IGT lib or if it
doesn't, copy some functions to intel_gpu_top.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 5/7] drm/i915/hwmon: Expose card reactive critical power

2022-09-21 Thread Dixit, Ashutosh
On Wed, 21 Sep 2022 08:07:15 -0700, Gupta, Anshuman wrote:
>

Hi Anshuman,

> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 55c35903adca..956e5298ef1e 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -6644,6 +6644,12 @@
> >   #define   DG1_PCODE_STATUS0x7E
> >   #define DG1_UNCORE_GET_INIT_STATUS0x0
> >   #define DG1_UNCORE_INIT_STATUS_COMPLETE   0x1
> > +#define   PCODE_POWER_SETUP0x7C
> > +#define POWER_SETUP_SUBCOMMAND_READ_I1 0x4
> > +#define POWER_SETUP_SUBCOMMAND_WRITE_I10x5
> > +#definePOWER_SETUP_I1_WATTSREG_BIT(31)
> > +#definePOWER_SETUP_I1_SHIFT6   /* 10.6 fixed 
> > point format */
> Could please add some comment to explain, why POWER_SETUP_I1_SHIFT  = 6,
> what is excatly 10.6 fixed point format ?
> With that.
> Reviewed-by: Anshuman Gupta 

10.6 fixed point format means a 16 bit number is represented as x.y where x
are the top 10 bits and y are the bottom 6 bits. The float value of this 16
bit number is (x + (y / 2^6)), so (x + (y / 64)). For example the number
0x8008 will have the value (1 * 2^9 + 8 / 2^6) == 512.125. Note that the
hexadecimal number 0x8008 == 32776 and 512.125 == 32776 / 64 which is why
POWER_SETUP_I1_SHIFT is 6 (2^6 == 64).

Similarly, the 8.8 fixed point format is explained in
gt/intel_gt_sysfs_pm.c. Do you think this needs a comment? I thought "10.6
fixed point format" is a sufficient hint (fixed point numbers are fairly
well known).

An even trickier data format is in the patch "drm/i915/hwmon: Expose
power1_max_interval" in hwm_power1_max_interval_show() but I think I have a
long comment there.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

2022-09-21 Thread Dixit, Ashutosh
On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
>
> On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
> >
> > +/*
> > + * For execlist mode of submission, pick an unused context id
> > + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> > + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> > + *
> > + * For GuC mode of submission read context id from the upper dword of the
> > + * EXECLIST_STATUS register.
> > + */
> > +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> > +{
> > +   u32 ctx_id, mask;
> > +   int ret;
> > +
> > +   if (intel_engine_uses_guc(stream->engine)) {
> > +   ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
> > +   if (ret)
> > +   return ret;
> > +
> > +   mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
> > +   (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
> > +   } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
> > +   ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> > +   (XEHP_SW_CTX_ID_SHIFT - 32);
> > +
> > +   mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> > +   (XEHP_SW_CTX_ID_SHIFT - 32);
> > +   } else {
> > +   ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
> > +(GEN11_SW_CTX_ID_SHIFT - 32);
> > +
> > +   mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
> > +   (GEN11_SW_CTX_ID_SHIFT - 32);
>
> Previously I missed that these ctx_id's for non-GuC cases are just
> constants. How does it work in these cases?

For the record, offline reply from Umesh for this question:

Looks like the SW context id is set to a unique value by the KMD for
execlist mode here - __execlists_schedule_in() as ccid. Later it is written
to the execlist port here (as lrc.desc) - execlists_submit_ports(). It's
just a unique value that the kmd determines. For OA we are setting a
ce->tag when OA use case is active and it used by the
__execlists_schedule_in().

Related commit from Chris - 2935ed5339c49

I think the reason why OA is setting it is because this value is not
assigned until __execlists_schedule_in() is called. For OA context, this
may happen much later. The code that Chris has added, just assigns a value
in OA and then uses it later in the __execlists_schedule_in() path.


Re: [Intel-gfx] [PATCH 01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

2022-09-21 Thread Dixit, Ashutosh
On Wed, 21 Sep 2022 20:44:57 -0700, Dixit, Ashutosh wrote:
>
> On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
> >
> > On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
> > >
> > > +/*
> > > + * For execlist mode of submission, pick an unused context id
> > > + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> > > + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> > > + *
> > > + * For GuC mode of submission read context id from the upper dword of the
> > > + * EXECLIST_STATUS register.
> > > + */
> > > +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> > > +{
> > > + u32 ctx_id, mask;
> > > + int ret;
> > > +
> > > + if (intel_engine_uses_guc(stream->engine)) {
> > > + ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
> > > + (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
> > > + } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
> > > + ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> > > + (XEHP_SW_CTX_ID_SHIFT - 32);
> > > +
> > > + mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> > > + (XEHP_SW_CTX_ID_SHIFT - 32);
> > > + } else {
> > > + ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
> > > +  (GEN11_SW_CTX_ID_SHIFT - 32);
> > > +
> > > + mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
> > > + (GEN11_SW_CTX_ID_SHIFT - 32);
> >
> > Previously I missed that these ctx_id's for non-GuC cases are just
> > constants. How does it work in these cases?
>
> For the record, offline reply from Umesh for this question:
>
> Looks like the SW context id is set to a unique value by the KMD for
> execlist mode here - __execlists_schedule_in() as ccid. Later it is written
> to the execlist port here (as lrc.desc) - execlists_submit_ports(). It's
> just a unique value that the kmd determines. For OA we are setting a
> ce->tag when OA use case is active and it used by the
> __execlists_schedule_in().
>
> Related commit from Chris - 2935ed5339c49
>
> I think the reason why OA is setting it is because this value is not
> assigned until __execlists_schedule_in() is called. For OA context, this
> may happen much later. The code that Chris has added, just assigns a value
> in OA and then uses it later in the __execlists_schedule_in() path.

I would still think this should not be a constant value but something which
depends on the context or the context id. Anyway since this is a
pre-existing issue not introducd in this patch, I will disregard this and
continue reviewing this patch.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

2022-09-21 Thread Dixit, Ashutosh
On Mon, 19 Sep 2022 20:22:40 -0700, Dixit, Ashutosh wrote:
>
> On Thu, 15 Sep 2022 15:49:27 -0700, Umesh Nerlige Ramappa wrote:
> >
> > On Wed, Sep 14, 2022 at 04:13:41PM -0700, Umesh Nerlige Ramappa wrote:
> > > On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
> > >> On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
> > >>> On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
> > >>>> On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
> > >>>>> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> > >>>>>> With GuC mode of submission, GuC is in control of defining the
> > >>>>>> context id field
> > >>>>>> that is part of the OA reports. To filter reports, UMD and KMD must
> > >>>>>> know what sw
> > >>>>>> context id was chosen by GuC. There is not interface between KMD and
> > >>>>>> GuC to
> > >>>>>> determine this, so read the upper-dword of EXECLIST_STATUS to
> > >>>>>> filter/squash OA
> > >>>>>> reports for the specific context.
> > >>>>>>
> > >>>>>> Signed-off-by: Umesh Nerlige Ramappa 
> > >>>>>> 
> > >>>>>
> > >>>>>
> > >>>>> I assume you checked with GuC that this doesn't change as the context
> > >>>>> is running?
> > >>>>
> > >>>> Correct.
> > >>>>
> > >>>>>
> > >>>>> With i915/execlist submission mode, we had to ask i915 to pin the
> > >>>>> sw_id/ctx_id.
> > >>>>>
> > >>>>
> > >>>> From GuC perspective, the context id can change once KMD de-registers
> > >>>> the context and that will not happen while the context is in use.
> > >>>>
> > >>>> Thanks,
> > >>>> Umesh
> > >>>
> > >>>
> > >>> Thanks Umesh,
> > >>>
> > >>>
> > >>> Maybe I should have been more precise in my question :
> > >>>
> > >>>
> > >>> Can the ID change while the i915-perf stream is opened?
> > >>>
> > >>> Because the ID not changing while the context is running makes sense.
> > >>>
> > >>> But since the number of available IDs is limited to 2k or something on
> > >>> Gfx12, it's possible the GuC has to reuse IDs if too many apps want to
> > >>> run during the period of time while i915-perf is active and filtering.
> > >>>
> > >>
> > >> available guc ids are 64k with 4k reserved for multi-lrc, so GuC may
> > >> have to reuse ids once 60k ids are used up.
> > >
> > > Spoke to the GuC team again and if there are a lot of contexts (> 60K)
> > > running, there is a possibility of the context id being recycled. In that
> > > case, the capture would be broken. I would track this as a separate JIRA
> > > and follow up on a solution.
> > >
> > > From OA use case perspective, are we interested in monitoring just one
> > > hardware context? If we make sure this context is not stolen, are we
> > > good?
> > >
> >
> > + John
> >
> > Based on John's inputs - if a context is pinned, then KMD does not steal
> > it's id. It would just look for something else or wait for a context to be
> > available (pin count 0 I believe).
> >
> > Since we pin the context for the duration of the OA use case, we should be
> > good here.
>
> Since this appears to be true I am thinking of okay'ing this patch rather
> than define a new interface with GuC for this. Let me know if there are any
> objections about this.

With the above comments/assumptions this is:

Reviewed-by: Ashutosh Dixit 


Re: [Intel-gfx] [PATCH 3/7] drm/i915/hwmon: Power PL1 limit and TDP setting

2022-09-22 Thread Dixit, Ashutosh
On Thu, 22 Sep 2022 00:08:46 -0700, Gupta, Anshuman wrote:
>

Hi Anshuman,

> On 9/21/2022 8:23 PM, Nilawar, Badal wrote:
> >
> > On 21-09-2022 17:15, Gupta, Anshuman wrote:
> >>
> >>> +static int
> >>> +hwm_power_read(struct hwm_drvdata *ddat, u32 attr, int chan, long *val)
> >>> +{
> >>> +    struct i915_hwmon *hwmon = ddat->hwmon;
> >>> +
> >>> +    switch (attr) {
> >>> +    case hwmon_power_max:
> >>> +    *val = hwm_field_read_and_scale(ddat,
> >>> +    hwmon->rg.pkg_rapl_limit,
> >>> +    PKG_PWR_LIM_1,
> >>> +    hwmon->scl_shift_power,
> >>> +    SF_POWER);
> >>> +    return 0;
> >>> +    case hwmon_power_rated_max:
> >>> +    *val = hwm_field_read_and_scale(ddat,
> >>> +    hwmon->rg.pkg_power_sku,
> >>> +    PKG_PKG_TDP,It seems a dead code,
> >>> pkg_power_sky register in initialized with
> >> INVALID_MMMIO_REG, why are we exposing this, unless i am missing
> >> something ?
> > Agree that for platforms considered in this series does not support
> > hwmon_power_rated_max. In fact hwm_power_is_visible will not allow to
> > create sysfs entry if pkg_power_sku is not supported. Considering future
> > dgfx platforms we didn't remove this entry. In future for supported
> > platforms we just need to assign valid register to pkg_power_sku.
>
> AFAIU PACKAGE_POWER_SKU reg is valid for both DG1 and DG2 from BSpec:51862
> So we need to define the register.
> See once more comment below,

Thanks for pointing out, I didn't know where to look for it. We will add
it. Thanks to Badal for locating the register too.

> >
> > Regards,
> > Badal
> >> Br,
> >> Anshuman.
> >>> +    hwmon->scl_shift_power,
> >>> +    SF_POWER);
> >>> +    return 0;
> >>> +    default:
> >>> +    return -EOPNOTSUPP;
> >>> +    }
> >>> +}
> >>> +
> >>> +static int
> /snip
> >>> diff --git a/drivers/gpu/drm/i915/i915_reg.h
> >>> b/drivers/gpu/drm/i915/i915_reg.h
> >>> index 1a9bd829fc7e..55c35903adca 100644
> >>> --- a/drivers/gpu/drm/i915/i915_reg.h
> >>> +++ b/drivers/gpu/drm/i915/i915_reg.h
> >>> @@ -1807,6 +1807,11 @@
> >>>   #define   POWER_LIMIT_1_MASK    REG_BIT(10)
> >>>   #define   POWER_LIMIT_2_MASK    REG_BIT(11)
> >>> +/*
> >>> + * *_PACKAGE_POWER_SKU - SKU power and timing parameters.
> >>> + */
> >>> +#define   PKG_PKG_TDP    GENMASK_ULL(14, 0)
> Define register above this definition, GENMASK should follow
> by a register.

Will do.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 6/7] drm/i915/hwmon: Expose power1_max_interval

2022-09-22 Thread Dixit, Ashutosh
On Thu, 22 Sep 2022 00:13:00 -0700, Gupta, Anshuman wrote:
>

Hi Anshuman,

> > +static ssize_t
> > +hwm_power1_max_interval_store(struct device *dev,
> > + struct device_attribute *attr,
> > + const char *buf, size_t count)
> > +{
> > +   struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > +   struct i915_hwmon *hwmon = ddat->hwmon;
> > +   long val, max_win, ret;
> > +   u32 x, y, rxy, x_w = 2; /* 2 bits */
> > +   u64 tau4, r;
> > +
> > +#define PKG_MAX_WIN_DEFAULT 0x12ull
> > +
> > +   ret = kstrtoul(buf, 0, &val);
> > +   if (ret)
> > +   return ret;
> > +
> > +   /*
> > +* val must be < max in hwmon interface units. The steps below are
> > +* explained in i915_power1_max_interval_show()
> > +*/
> > +   r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
>
> AFAIU we need to read r from PACKAGE_POWER_SKU reg untill unless it has
> some known issue?

The platform on which I tried had an incorrect value (that is why I didn't
read it from PACKAGE_POWER_SKU) but let me investigate it some more for
other platforms and get back.

> > +   x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
> > +   y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
> > +   tau4 = ((1 << x_w) | x) << y;
> > +   max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
> > +
> > +   if (val > max_win)
> > +   return -EINVAL;
> > +
> > +   /* val in hw units */
> > +   val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
> > +   /* Convert to 1.x * power(2,y) */
> > +   if (!val)
> > +   return -EINVAL;
> > +   y = ilog2(val);
> > +   /* x = (val - (1 << y)) >> (y - 2); */
> > +   x = (val - (1ul << y)) << x_w >> y;
> > +
> > +   rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | 
> > REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
> > +
> > +   hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
> > +   PKG_PWR_LIM_1_TIME, rxy);
> > +   return count;
> > +}
> > +
> /snip
> > if (IS_ERR(hwmon_dev)) {
> > mutex_destroy(&hwmon->hwmon_lock);
> > i915->hwmon = NULL;
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > b/drivers/gpu/drm/i915/i915_reg.h
> > index 956e5298ef1e..68e7cc85dc53 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -1811,6 +1811,9 @@
> >* *_PACKAGE_POWER_SKU - SKU power and timing parameters.
> >*/
> >   #define   PKG_PKG_TDP GENMASK_ULL(14, 0)
> > +#define   PKG_MAX_WIN  GENMASK_ULL(54, 48)
> > +#define PKG_MAX_WIN_X  GENMASK_ULL(54, 53)
> > +#define PKG_MAX_WIN_Y  GENMASK_ULL(52, 48)
> These GENMASK fields needs a reg definition.

Yes this is the same _PACKAGE_POWER_SKU register so should get fixed when
we add it in Patch 3.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 6/7] drm/i915/hwmon: Expose power1_max_interval

2022-09-22 Thread Dixit, Ashutosh
On Thu, 22 Sep 2022 19:51:45 -0700, Dixit, Ashutosh wrote:
>
> On Thu, 22 Sep 2022 00:13:00 -0700, Gupta, Anshuman wrote:
> >
>
> Hi Anshuman,
>
> > > +static ssize_t
> > > +hwm_power1_max_interval_store(struct device *dev,
> > > +   struct device_attribute *attr,
> > > +   const char *buf, size_t count)
> > > +{
> > > + struct hwm_drvdata *ddat = dev_get_drvdata(dev);
> > > + struct i915_hwmon *hwmon = ddat->hwmon;
> > > + long val, max_win, ret;
> > > + u32 x, y, rxy, x_w = 2; /* 2 bits */
> > > + u64 tau4, r;
> > > +
> > > +#define PKG_MAX_WIN_DEFAULT 0x12ull
> > > +
> > > + ret = kstrtoul(buf, 0, &val);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + /*
> > > +  * val must be < max in hwmon interface units. The steps below are
> > > +  * explained in i915_power1_max_interval_show()
> > > +  */
> > > + r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);
> >
> > AFAIU we need to read r from PACKAGE_POWER_SKU reg untill unless it has
> > some known issue?
>
> The platform on which I tried had an incorrect value (that is why I didn't
> read it from PACKAGE_POWER_SKU) but let me investigate it some more for
> other platforms and get back.

I checked, the value is correct on DG1/DG2 which have a valid
PACKAGE_POWER_SKU (XEHPSDV does not have a valid
PACKAGE_POWER_SKU). Therefore the one line above should be replaced with
the code below:

if (i915_mmio_reg_valid(hwmon->rg.pkg_power_sku))
with_intel_runtime_pm(ddat->uncore->rpm, wakeref)
r = intel_uncore_read64(ddat->uncore, 
hwmon->rg.pkg_power_sku);
else
r = FIELD_PREP(PKG_MAX_WIN, PKG_MAX_WIN_DEFAULT);

> > > + x = REG_FIELD_GET(PKG_MAX_WIN_X, r);
> > > + y = REG_FIELD_GET(PKG_MAX_WIN_Y, r);
> > > + tau4 = ((1 << x_w) | x) << y;
> > > + max_win = mul_u64_u32_shr(tau4, SF_TIME, hwmon->scl_shift_time + x_w);
> > > +
> > > + if (val > max_win)
> > > + return -EINVAL;
> > > +
> > > + /* val in hw units */
> > > + val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
> > > + /* Convert to 1.x * power(2,y) */
> > > + if (!val)
> > > + return -EINVAL;
> > > + y = ilog2(val);
> > > + /* x = (val - (1 << y)) >> (y - 2); */
> > > + x = (val - (1ul << y)) << x_w >> y;
> > > +
> > > + rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | 
> > > REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
> > > +
> > > + hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
> > > + PKG_PWR_LIM_1_TIME, rxy);
> > > + return count;
> > > +}
> > > +
> > /snip
> > >   if (IS_ERR(hwmon_dev)) {
> > >   mutex_destroy(&hwmon->hwmon_lock);
> > >   i915->hwmon = NULL;
> > > diff --git a/drivers/gpu/drm/i915/i915_reg.h 
> > > b/drivers/gpu/drm/i915/i915_reg.h
> > > index 956e5298ef1e..68e7cc85dc53 100644
> > > --- a/drivers/gpu/drm/i915/i915_reg.h
> > > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > > @@ -1811,6 +1811,9 @@
> > >* *_PACKAGE_POWER_SKU - SKU power and timing parameters.
> > >*/
> > >   #define   PKG_PKG_TDP   GENMASK_ULL(14, 0)
> > > +#define   PKG_MAX_WINGENMASK_ULL(54, 48)
> > > +#define PKG_MAX_WIN_XGENMASK_ULL(54, 53)
> > > +#define PKG_MAX_WIN_YGENMASK_ULL(52, 48)
> > These GENMASK fields needs a reg definition.
>
> Yes this is the same _PACKAGE_POWER_SKU register so should get fixed when
> we add it in Patch 3.

Looks like PCU_PACKAGE_POWER_SKU for DG1/DG2 will need to be declared in
intel_mchbar_regs.h so these fields will need to also move there (in
Patch 3).

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915: Update doc for __intel_wakeref_put()

2023-01-05 Thread Dixit, Ashutosh
On Thu, 05 Jan 2023 07:38:31 -0800, Nirmoy Das wrote:
>

Hi Nirmoy,

> Fix the __intel_wakeref_put() doc to reflect current behaviour.
>
> Fixes: c7302f204490 ("drm/i915: Defer final intel_wakeref_put to process 
> context")

Seems to be d91e657876a9?

> Signed-off-by: Nirmoy Das 
> ---
>  drivers/gpu/drm/i915/intel_wakeref.h | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h 
> b/drivers/gpu/drm/i915/intel_wakeref.h
> index 4f4c2e15e736..b5e1c61b5003 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -135,14 +135,12 @@ intel_wakeref_might_get(struct intel_wakeref *wf)
>   * @flags: control flags
>   *
>   * Release our hold on the wakeref. When there are no more users,
> - * the runtime pm wakeref will be released after the @fn callback is called
> - * underneath the wakeref mutex.
> + * the runtime pm wakeref will be released after the intel_wakeref_ops->put()
> + * callback is called underneath the wakeref mutex.
>   *
> - * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
> - * is retained and an error reported.
> + * Note that intel_wakeref_ops->put() is allowed to fail, in which case the
> + * runtime-pm wakeref is retained.

@fn is used in multiple places in this file (including some 'get' usages)
so maybe needs to be fixed there too. Maybe better to just say somewhere
@fn refers to ops->get()/put() where applicable?

>   *
> - * Returns: 0 if the wakeref was released successfully, or a negative error
> - * code otherwise.

So this seems to be the reason for the patch...

>   */
>  static inline void
>  __intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
> --
> 2.38.0
>

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915: Update docs in intel_wakeref.h

2023-01-05 Thread Dixit, Ashutosh
On Thu, 05 Jan 2023 12:38:43 -0800, Nirmoy Das wrote:
>
> Fix docs for __intel_wakeref_put() and intel_wakeref_get() to
> reflect current behaviour.

Reviewed-by: Ashutosh Dixit 

> Signed-off-by: Nirmoy Das 
> ---
>  drivers/gpu/drm/i915/intel_wakeref.h | 21 ++---
>  1 file changed, 10 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_wakeref.h 
> b/drivers/gpu/drm/i915/intel_wakeref.h
> index 4f4c2e15e736..71b8a63f6f10 100644
> --- a/drivers/gpu/drm/i915/intel_wakeref.h
> +++ b/drivers/gpu/drm/i915/intel_wakeref.h
> @@ -68,11 +68,12 @@ void __intel_wakeref_put_last(struct intel_wakeref *wf, 
> unsigned long flags);
>   * @wf: the wakeref
>   *
>   * Acquire a hold on the wakeref. The first user to do so, will acquire
> - * the runtime pm wakeref and then call the @fn underneath the wakeref
> - * mutex.
> + * the runtime pm wakeref and then call the intel_wakeref_ops->get()
> + * underneath the wakeref mutex.
>   *
> - * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
> - * will be released and the acquisition unwound, and an error reported.
> + * Note that intel_wakeref_ops->get() is allowed to fail, in which case
> + * the runtime-pm wakeref will be released and the acquisition unwound,
> + * and an error reported.
>   *
>   * Returns: 0 if the wakeref was acquired successfully, or a negative error
>   * code otherwise.
> @@ -130,19 +131,17 @@ intel_wakeref_might_get(struct intel_wakeref *wf)
>  }
>
>  /**
> - * intel_wakeref_put_flags: Release the wakeref
> + * __intel_wakeref_put: Release the wakeref
>   * @wf: the wakeref
>   * @flags: control flags
>   *
>   * Release our hold on the wakeref. When there are no more users,
> - * the runtime pm wakeref will be released after the @fn callback is called
> - * underneath the wakeref mutex.
> + * the runtime pm wakeref will be released after the intel_wakeref_ops->put()
> + * callback is called underneath the wakeref mutex.
>   *
> - * Note that @fn is allowed to fail, in which case the runtime-pm wakeref
> - * is retained and an error reported.
> + * Note that intel_wakeref_ops->put() is allowed to fail, in which case the
> + * runtime-pm wakeref is retained.
>   *
> - * Returns: 0 if the wakeref was released successfully, or a negative error
> - * code otherwise.
>   */
>  static inline void
>  __intel_wakeref_put(struct intel_wakeref *wf, unsigned long flags)
> --
> 2.38.0
>


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0

2023-01-12 Thread Dixit, Ashutosh
On Thu, 12 Jan 2023 18:27:52 -0800, Vinay Belgaumkar wrote:
>
> Reading current root sysfs entries gives a min/max of all
> GTs. Updating this so we return default (GT0) values when root
> level sysfs entries are accessed, instead of min/max for the card.
> Tests that are not multi GT capable will read incorrect sysfs
> values without this change on multi-GT platforms like MTL.
>
> Fixes: a8a4f0467d70 ("drm/i915: Fix CFI violations in gt_sysfs")

We seem to be proposing to change the previous sysfs ABI with this patch?
But even then it doesn't seem correct to use gt0 values for device level
sysfs. Actually I received the following comment about using max freq
across gt's for device level freq's (gt_act_freq_mhz etc.) from one of our
users:

-
On Sun, 06 Nov 2022 08:54:04 -0800, Lawson, Lowren H wrote:

Why show maximum? Wouldn’t average be more accurate to the user experience?

As a user, I expect the ‘card’ frequency to be relatively accurate to the
entire card. If I see 1.6GHz, but the card is behaving as if it’s running a
1.0 & 1.6GHz on the different compute tiles, I’m going to see a massive
decrease in compute workload performance while at ‘maximum’ frequency.
-

So I am not sure why max/min were previously chosen. Why not the average?

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH] drm/i915/mtl: Connect root sysfs entries to GT0

2023-01-12 Thread Dixit, Ashutosh
On Thu, 12 Jan 2023 20:26:34 -0800, Belgaumkar, Vinay wrote:
>
> I think the ABI was changed by the patch mentioned in the commit
> (a8a4f0467d70).

The ABI was originally changed in 80cf8af17af04 and 56a709cf77468.


Re: [PATCH] drm/i915: use IS_ENABLED() instead of defined() on config options

2024-09-04 Thread Dixit, Ashutosh
On Wed, 04 Sep 2024 07:52:18 -0700, Jani Nikula wrote:
>
> Prefer IS_ENABLED() instead of defined() for checking whether a kconfig
> option is enabled.

Reviewed-by: Ashutosh Dixit 

>
> Signed-off-by: Jani Nikula 
> ---
>  drivers/gpu/drm/i915/display/intel_display_irq.c | 2 +-
>  drivers/gpu/drm/i915/display/intel_dp_tunnel.h   | 2 +-
>  drivers/gpu/drm/i915/gem/i915_gem_pm.c   | 2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c| 2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_guc_log.c   | 2 +-
>  drivers/gpu/drm/i915/gt/uc/intel_huc.c   | 2 +-
>  drivers/gpu/drm/i915/i915_trace.h| 2 +-
>  drivers/gpu/drm/i915/i915_utils.h| 2 +-
>  drivers/gpu/drm/i915/selftests/mock_gem_device.c | 4 ++--
>  9 files changed, 10 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/display/intel_display_irq.c 
> b/drivers/gpu/drm/i915/display/intel_display_irq.c
> index 73369847ed66..9c0b83abbe3c 100644
> --- a/drivers/gpu/drm/i915/display/intel_display_irq.c
> +++ b/drivers/gpu/drm/i915/display/intel_display_irq.c
> @@ -298,7 +298,7 @@ void i915_enable_asle_pipestat(struct drm_i915_private 
> *dev_priv)
>   spin_unlock_irq(&dev_priv->irq_lock);
>  }
>
> -#if defined(CONFIG_DEBUG_FS)
> +#if IS_ENABLED(CONFIG_DEBUG_FS)
>  static void display_pipe_crc_irq_handler(struct drm_i915_private *dev_priv,
>enum pipe pipe,
>u32 crc0, u32 crc1,
> diff --git a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h 
> b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
> index a0c00b7d3303..e9314cf25a19 100644
> --- a/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
> +++ b/drivers/gpu/drm/i915/display/intel_dp_tunnel.h
> @@ -20,7 +20,7 @@ struct intel_dp;
>  struct intel_encoder;
>  struct intel_link_bw_limits;
>
> -#if defined(CONFIG_DRM_I915_DP_TUNNEL) && defined(I915)
> +#if IS_ENABLED(CONFIG_DRM_I915_DP_TUNNEL) && defined(I915)
>
>  int intel_dp_tunnel_detect(struct intel_dp *intel_dp, struct 
> drm_modeset_acquire_ctx *ctx);
>  void intel_dp_tunnel_disconnect(struct intel_dp *intel_dp);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c 
> b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> index 3b27218aabe2..900c08337942 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
> @@ -13,7 +13,7 @@
>  #include "i915_driver.h"
>  #include "i915_drv.h"
>
> -#if defined(CONFIG_X86)
> +#if IS_ENABLED(CONFIG_X86)
>  #include 
>  #else
>  #define wbinvd_on_all_cpus() \
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> index 23f54c84cbab..fe53e8eccf4b 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c
> @@ -145,7 +145,7 @@ static inline bool guc_load_done(struct intel_uncore 
> *uncore, u32 *status, bool
>   * an end user should hit the timeout is in case of extreme thermal 
> throttling.
>   * And a system that is that hot during boot is probably dead anyway!
>   */
> -#if defined(CONFIG_DRM_I915_DEBUG_GEM)
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>  #define GUC_LOAD_RETRY_LIMIT 20
>  #else
>  #define GUC_LOAD_RETRY_LIMIT 3
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> index bf16351c9349..222c95f62156 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_log.c
> @@ -14,7 +14,7 @@
>  #include "intel_guc_log.h"
>  #include "intel_guc_print.h"
>
> -#if defined(CONFIG_DRM_I915_DEBUG_GUC)
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GUC)
>  #define GUC_LOG_DEFAULT_CRASH_BUFFER_SIZESZ_2M
>  #define GUC_LOG_DEFAULT_DEBUG_BUFFER_SIZESZ_16M
>  #define GUC_LOG_DEFAULT_CAPTURE_BUFFER_SIZE  SZ_1M
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
> b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> index 2d9152eb7282..d7ac31c3254c 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
> @@ -455,7 +455,7 @@ static const char *auth_mode_string(struct intel_huc *huc,
>   * an end user should hit the timeout is in case of extreme thermal 
> throttling.
>   * And a system that is that hot during boot is probably dead anyway!
>   */
> -#if defined(CONFIG_DRM_I915_DEBUG_GEM)
> +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
>  #define HUC_LOAD_RETRY_LIMIT   20
>  #else
>  #define HUC_LOAD_RETRY_LIMIT   3
> diff --git a/drivers/gpu/drm/i915/i915_trace.h 
> b/drivers/gpu/drm/i915/i915_trace.h
> index ce1cbee1b39d..09d89bdf82f4 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -322,7 +322,7 @@ DEFINE_EVENT(i915_request, i915_request_add,
>TP_ARGS(rq)
>  );
>
> -#if defined(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
> +#if IS_ENABLED(CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS)
>  DEFINE_EVENT(i915_request, i915_request_guc_submit,
>TP_PROTO(struct i915_request *

<    1   2   3   4   5   6