Re: [Intel-gfx] [PATCH 5/9] drm/i915/pmu: Prepare for multi-tile non-engine counters

2023-03-31 Thread Tvrtko Ursulin



On 30/03/2023 23:28, Dixit, Ashutosh wrote:

On Thu, 30 Mar 2023 05:39:04 -0700, Tvrtko Ursulin wrote:




Hi Tvrtko,


diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 1b04c79907e8..a708e44a227e 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -38,13 +38,16 @@ enum {
__I915_NUM_PMU_SAMPLERS
   };
   +#define I915_PMU_MAX_GTS (4) /* FIXME */


3-4 years since writing this I have no idea what I meant by this
FIXME. Should have put a better comment.. :( It was early platform
enablement times so it was somewhat passable, but now I think we need to
figure out what I actually meant. Maybe removing the comment is fine.


diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..bbab7f3dbeb4 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -280,7 +280,17 @@ enum drm_i915_pmu_engine_sample {
   #define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
   -#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 +
(x))
+/*
+ * Top 8 bits of every non-engine counter are GT id.
+ * FIXME: __I915_PMU_GT_SHIFT will be changed to 56
+ */


I asked before and don't think I got an answer: Why is 4 bits not enough
for gt id? The comment is not my code I am pretty sure.


Both of the above FIXME's are the work of yours truly :-) (added during
PRELIM work).


Very kind of you but I think first one is mine. ;) I can find it in my 
local branch dating from at least June 2020.


I had an idea that maybe it was supposed to mean I wanted to results the 
I915_MAX_GT define and not duplicate a '4' here. Perhaps there was some 
header mess which made me give up at the time.


I think it is worth trying that now, maybe something changed.


Anyway given that now i915 will not support new product generations I think
we can just drop the FIXME's. Otherwise I was saying since we are only
using a few bottom bits, why not future proof things a bit and allow for
num_gt's to expand beyond 16.


Oh right.. I thought 16 gts will be enough but I also don't think I mind 
if it is 4 or 8 bits. Possibly at the time, as I was seeing more and 
more counters getting added, or better say classes of counters, I was 
starting to get wary of getting out of bits for future expansion. All of 
those were done by segmenting the numerical space, not bit wise, so 
perhaps the concern shouldn't have been there and 8 is also fine. Don't 
know really, don't think I have a strong opinion. Lets pick one and drop 
the FIXME comment.


Regards,

Tvrtko



So for now just drop the FIXME's for i915, revisit if needed with xe.





Re: [Intel-gfx] [PATCH 5/9] drm/i915/pmu: Prepare for multi-tile non-engine counters

2023-03-30 Thread Dixit, Ashutosh
On Thu, 30 Mar 2023 05:39:04 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h 
> > b/drivers/gpu/drm/i915/i915_pmu.h
> > index 1b04c79907e8..a708e44a227e 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -38,13 +38,16 @@ enum {
> > __I915_NUM_PMU_SAMPLERS
> >   };
> >   +#define I915_PMU_MAX_GTS (4) /* FIXME */
>
> 3-4 years since writing this I have no idea what I meant by this
> FIXME. Should have put a better comment.. :( It was early platform
> enablement times so it was somewhat passable, but now I think we need to
> figure out what I actually meant. Maybe removing the comment is fine.
>
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index dba7c5a5b25e..bbab7f3dbeb4 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -280,7 +280,17 @@ enum drm_i915_pmu_engine_sample {
> >   #define I915_PMU_ENGINE_SEMA(class, instance) \
> > __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
> >   -#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 +
> > (x))
> > +/*
> > + * Top 8 bits of every non-engine counter are GT id.
> > + * FIXME: __I915_PMU_GT_SHIFT will be changed to 56
> > + */
>
> I asked before and don't think I got an answer: Why is 4 bits not enough
> for gt id? The comment is not my code I am pretty sure.

Both of the above FIXME's are the work of yours truly :-) (added during
PRELIM work).

Anyway given that now i915 will not support new product generations I think
we can just drop the FIXME's. Otherwise I was saying since we are only
using a few bottom bits, why not future proof things a bit and allow for
num_gt's to expand beyond 16.

So for now just drop the FIXME's for i915, revisit if needed with xe.

Thanks.
--
Ashutosh


Re: [Intel-gfx] [PATCH 5/9] drm/i915/pmu: Prepare for multi-tile non-engine counters

2023-03-30 Thread Tvrtko Ursulin



On 30/03/2023 01:40, Umesh Nerlige Ramappa wrote:

From: Tvrtko Ursulin 

Reserve some bits in the counter config namespace which will carry the
tile id and prepare the code to handle this.

No per tile counters have been added yet.

Signed-off-by: Tvrtko Ursulin 
---
  drivers/gpu/drm/i915/i915_pmu.c | 153 +++-
  drivers/gpu/drm/i915/i915_pmu.h |   9 +-
  include/uapi/drm/i915_drm.h |  18 +++-
  3 files changed, 132 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c00b94c7f509..5d1de98d86b4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -56,11 +56,21 @@ static bool is_engine_config(u64 config)
return config < __I915_PMU_OTHER(0);
  }
  
+static unsigned int config_gt_id(const u64 config)

+{
+   return config >> __I915_PMU_GT_SHIFT;
+}
+
+static u64 config_counter(const u64 config)
+{
+   return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
+}
+
  static unsigned int other_bit(const u64 config)
  {
unsigned int val;
  
-	switch (config) {

+   switch (config_counter(config)) {
case I915_PMU_ACTUAL_FREQUENCY:
val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
break;
@@ -78,15 +88,20 @@ static unsigned int other_bit(const u64 config)
return -1;
}
  
-	return I915_ENGINE_SAMPLE_COUNT + val;

+   return I915_ENGINE_SAMPLE_COUNT +
+  config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
+  val;
  }
  
  static unsigned int config_bit(const u64 config)

  {
-   if (is_engine_config(config))
+   if (is_engine_config(config)) {
+   GEM_BUG_ON(config_gt_id(config));
+
return engine_config_sample(config);
-   else
+   } else {
return other_bit(config);
+   }
  }
  
  static u64 config_mask(u64 config)

@@ -104,6 +119,18 @@ static unsigned int event_bit(struct perf_event *event)
return config_bit(event->attr.config);
  }
  
+static u64 frequency_enabled_mask(void)

+{
+   unsigned int i;
+   u64 mask = 0;
+
+   for (i = 0; i < I915_PMU_MAX_GTS; i++)
+   mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
+   config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
+
+   return mask;
+}
+
  static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
  {
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
@@ -120,9 +147,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
 * Mask out all the ones which do not need the timer, or in
 * other words keep all the ones that could need the timer.
 */
-   enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
- config_mask(I915_PMU_REQUESTED_FREQUENCY) |
- ENGINE_SAMPLE_MASK;
+   enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
  
  	/*

 * When the GPU is idle per-engine counters do not need to be
@@ -164,9 +189,39 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
  }
  
+static unsigned int

+__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
+
+   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
+
+   return idx;
+}
+
+static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+}
+
+static void
+store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
+{
+   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+}
+
+static void
+add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val,
+   u32 mul)
+{
+   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur +=
+   mul_u32_u32(val, mul);
+}
+
  static u64 get_rc6(struct intel_gt *gt)
  {
struct drm_i915_private *i915 = gt->i915;
+   const unsigned int gt_id = gt->info.id;
struct i915_pmu *pmu = >pmu;
unsigned long flags;
bool awake = false;
@@ -181,7 +236,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
  
  	if (awake) {

-   pmu->sample[__I915_SAMPLE_RC6].cur = val;
+   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
} else {
/*
 * We think we are runtime suspended.
@@ -190,14 +245,14 @@ static u64 get_rc6(struct intel_gt *gt)
 * on top of the last known real value, as the approximated RC6
 * counter value.
 */
-   val = ktime_since_raw(pmu->sleep_last);
-   val += pmu->sample[__I915_SAMPLE_RC6].cur;
+   val = ktime_since_raw(pmu->sleep_last[gt_id]);
+   

[Intel-gfx] [PATCH 5/9] drm/i915/pmu: Prepare for multi-tile non-engine counters

2023-03-29 Thread Umesh Nerlige Ramappa
From: Tvrtko Ursulin 

Reserve some bits in the counter config namespace which will carry the
tile id and prepare the code to handle this.

No per tile counters have been added yet.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 153 +++-
 drivers/gpu/drm/i915/i915_pmu.h |   9 +-
 include/uapi/drm/i915_drm.h |  18 +++-
 3 files changed, 132 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index c00b94c7f509..5d1de98d86b4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -56,11 +56,21 @@ static bool is_engine_config(u64 config)
return config < __I915_PMU_OTHER(0);
 }
 
+static unsigned int config_gt_id(const u64 config)
+{
+   return config >> __I915_PMU_GT_SHIFT;
+}
+
+static u64 config_counter(const u64 config)
+{
+   return config & ~(~0ULL << __I915_PMU_GT_SHIFT);
+}
+
 static unsigned int other_bit(const u64 config)
 {
unsigned int val;
 
-   switch (config) {
+   switch (config_counter(config)) {
case I915_PMU_ACTUAL_FREQUENCY:
val =  __I915_PMU_ACTUAL_FREQUENCY_ENABLED;
break;
@@ -78,15 +88,20 @@ static unsigned int other_bit(const u64 config)
return -1;
}
 
-   return I915_ENGINE_SAMPLE_COUNT + val;
+   return I915_ENGINE_SAMPLE_COUNT +
+  config_gt_id(config) * __I915_PMU_TRACKED_EVENT_COUNT +
+  val;
 }
 
 static unsigned int config_bit(const u64 config)
 {
-   if (is_engine_config(config))
+   if (is_engine_config(config)) {
+   GEM_BUG_ON(config_gt_id(config));
+
return engine_config_sample(config);
-   else
+   } else {
return other_bit(config);
+   }
 }
 
 static u64 config_mask(u64 config)
@@ -104,6 +119,18 @@ static unsigned int event_bit(struct perf_event *event)
return config_bit(event->attr.config);
 }
 
+static u64 frequency_enabled_mask(void)
+{
+   unsigned int i;
+   u64 mask = 0;
+
+   for (i = 0; i < I915_PMU_MAX_GTS; i++)
+   mask |= config_mask(__I915_PMU_ACTUAL_FREQUENCY(i)) |
+   config_mask(__I915_PMU_REQUESTED_FREQUENCY(i));
+
+   return mask;
+}
+
 static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active)
 {
struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu);
@@ -120,9 +147,7 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool 
gpu_active)
 * Mask out all the ones which do not need the timer, or in
 * other words keep all the ones that could need the timer.
 */
-   enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) |
- config_mask(I915_PMU_REQUESTED_FREQUENCY) |
- ENGINE_SAMPLE_MASK;
+   enable &= frequency_enabled_mask() | ENGINE_SAMPLE_MASK;
 
/*
 * When the GPU is idle per-engine counters do not need to be
@@ -164,9 +189,39 @@ static inline s64 ktime_since_raw(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
+static unsigned int
+__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+   unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
+
+   GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
+
+   return idx;
+}
+
+static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
+{
+   return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
+}
+
+static void
+store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
+{
+   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
+}
+
+static void
+add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val,
+   u32 mul)
+{
+   pmu->sample[__sample_idx(pmu, gt_id, sample)].cur +=
+   mul_u32_u32(val, mul);
+}
+
 static u64 get_rc6(struct intel_gt *gt)
 {
struct drm_i915_private *i915 = gt->i915;
+   const unsigned int gt_id = gt->info.id;
struct i915_pmu *pmu = >pmu;
unsigned long flags;
bool awake = false;
@@ -181,7 +236,7 @@ static u64 get_rc6(struct intel_gt *gt)
spin_lock_irqsave(>lock, flags);
 
if (awake) {
-   pmu->sample[__I915_SAMPLE_RC6].cur = val;
+   store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
} else {
/*
 * We think we are runtime suspended.
@@ -190,14 +245,14 @@ static u64 get_rc6(struct intel_gt *gt)
 * on top of the last known real value, as the approximated RC6
 * counter value.
 */
-   val = ktime_since_raw(pmu->sleep_last);
-   val += pmu->sample[__I915_SAMPLE_RC6].cur;
+   val = ktime_since_raw(pmu->sleep_last[gt_id]);
+   val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
}
 
-