[Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-06-07 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.

This is useful to analyze the overall load of the system.

v2:
 * Rebase for name change and re-order.
 * Drop floating point constant. (Chris Wilson)

v3:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

v4:
 * Refactored for timer period accounting.

v5:
 * Avoid 64-division. (Chris Wilson)

v6:
 * Do fewer divisions by accumulating in qd.ns units. (Chris Wilson)
 * Change counter scale to avoid multiplication in readout and increase
   counter headroom.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 58 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h |  9 +++-
 3 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b8c6953867ee..f8a819600ebc 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,7 +15,8 @@
 #define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
 BIT(I915_SAMPLE_WAIT) | \
-BIT(I915_SAMPLE_SEMA))
+BIT(I915_SAMPLE_SEMA) | \
+BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -161,6 +162,12 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
sample->cur += val;
 }
 
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
 static void
 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
@@ -204,6 +211,11 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
if (val & RING_WAIT_SEMAPHORE)
add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
   period_ns);
+
+   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+   add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+   
atomic_read(&engine->request_stats.queued),
+   period_ns);
}
 
if (fw)
@@ -212,12 +224,6 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
intel_runtime_pm_put(dev_priv);
 }
 
-static void
-add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
-{
-   sample->cur += mul_u32_u32(val, mul);
-}
-
 static void
 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
@@ -323,6 +329,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+   case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -540,6 +547,15 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = ktime_to_ns(intel_engine_get_busy_time(engine));
} else {
val = engine->pmu.sample[sample].cur;
+
+   if (sample == I915_SAMPLE_QUEUED) {
+   BUILD_BUG_ON(NSEC_PER_SEC %
+I915_SAMPLE_QUEUED_DIVISOR);
+   /* to qd */
+   val = div_u64(val,
+ NSEC_PER_SEC /
+ I915_SAMPLE_QUEUED_DIVISOR);
+   }
}
} else {
switch (event->attr.config) {
@@ -796,6 +812,16 @@ static const struct attribute_group 
*i915_pmu_attr_groups[] = {
 { \
.sample = (__sample), \
.name = (__name), \
+   .suffix = "unit", \
+   .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+   .sample = (__sample), \
+   .name = (__name), \
+   .suffix = "scale", \
+   .value = (__scale), \
 }
 
 static struct i915_ext_attribute *
@@ -823,6 +849,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
return ++attr;
 }
 
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.001
+
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
 {
@@ -839,10 +868,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+   char *suffix;
+   char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+   __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+ 

[Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-06-06 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.

This is useful to analyze the overall load of the system.

v2:
 * Rebase for name change and re-order.
 * Drop floating point constant. (Chris Wilson)

v3:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

v4:
 * Refactored for timer period accounting.

v5:
 * Avoid 64-division. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 54 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h |  9 -
 3 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b8c6953867ee..ba2205d92190 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,7 +15,8 @@
 #define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
 BIT(I915_SAMPLE_WAIT) | \
-BIT(I915_SAMPLE_SEMA))
+BIT(I915_SAMPLE_SEMA) | \
+BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -161,6 +162,12 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
sample->cur += val;
 }
 
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
 static void
 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
@@ -204,6 +211,13 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
if (val & RING_WAIT_SEMAPHORE)
add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
   period_ns);
+
+   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+   add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+   
atomic_read(&engine->request_stats.queued),
+   div_u64((u64)period_ns *
+   I915_SAMPLE_QUEUED_DIVISOR,
+   100));
}
 
if (fw)
@@ -212,12 +226,6 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
intel_runtime_pm_put(dev_priv);
 }
 
-static void
-add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
-{
-   sample->cur += mul_u32_u32(val, mul);
-}
-
 static void
 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
@@ -323,6 +331,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+   case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -540,6 +549,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = ktime_to_ns(intel_engine_get_busy_time(engine));
} else {
val = engine->pmu.sample[sample].cur;
+
+   if (sample == I915_SAMPLE_QUEUED)
+   val = div_u64(val, MSEC_PER_SEC);  /* to qd */
}
} else {
switch (event->attr.config) {
@@ -796,6 +808,16 @@ static const struct attribute_group 
*i915_pmu_attr_groups[] = {
 { \
.sample = (__sample), \
.name = (__name), \
+   .suffix = "unit", \
+   .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+   .sample = (__sample), \
+   .name = (__name), \
+   .suffix = "scale", \
+   .value = (__scale), \
 }
 
 static struct i915_ext_attribute *
@@ -823,6 +845,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
return ++attr;
 }
 
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.0009765625
+
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
 {
@@ -839,10 +864,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+   char *suffix;
+   char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+   __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+__stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -852,6 +881,9 @@ create_event_attributes(struct drm_i915_private *i915)
enum intel_engine_id id;
unsigned int i;
 
+   BUILD_BUG

Re: [Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-06-06 Thread Tvrtko Ursulin


On 06/06/2018 14:16, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2018-06-06 13:48:45)

@@ -204,6 +211,12 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
 if (val & RING_WAIT_SEMAPHORE)
 add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
period_ns);
+
+   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+   add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+   
atomic_read(&engine->request_stats.queued),
+   (u64)period_ns *
+   I915_SAMPLE_QUEUED_DIVISOR / 100);


Doesn't this promote to a 64b divide?


Yes my bad. Will need to use div_u64 and resend the three musketeers..

Regards,

Tvrtko

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-06-06 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-06-06 13:48:45)
> @@ -204,6 +211,12 @@ engines_sample(struct drm_i915_private *dev_priv, 
> unsigned int period_ns)
> if (val & RING_WAIT_SEMAPHORE)
> add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
>period_ns);
> +
> +   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
> +   
> add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
> +   
> atomic_read(&engine->request_stats.queued),
> +   (u64)period_ns *
> +   I915_SAMPLE_QUEUED_DIVISOR / 100);

Doesn't this promote to a 64b divide?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-06-06 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.

This is useful to analyze the overall load of the system.

v2:
 * Rebase for name change and re-order.
 * Drop floating point constant. (Chris Wilson)

v3:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

v4:
 * Refactored for timer period accounting.

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 53 -
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h |  9 -
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b8c6953867ee..5f8cc3fe1826 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,7 +15,8 @@
 #define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
 BIT(I915_SAMPLE_WAIT) | \
-BIT(I915_SAMPLE_SEMA))
+BIT(I915_SAMPLE_SEMA) | \
+BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -161,6 +162,12 @@ add_sample(struct i915_pmu_sample *sample, u32 val)
sample->cur += val;
 }
 
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+   sample->cur += mul_u32_u32(val, mul);
+}
+
 static void
 engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
@@ -204,6 +211,12 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
if (val & RING_WAIT_SEMAPHORE)
add_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
   period_ns);
+
+   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+   add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+   
atomic_read(&engine->request_stats.queued),
+   (u64)period_ns *
+   I915_SAMPLE_QUEUED_DIVISOR / 100);
}
 
if (fw)
@@ -212,12 +225,6 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned 
int period_ns)
intel_runtime_pm_put(dev_priv);
 }
 
-static void
-add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
-{
-   sample->cur += mul_u32_u32(val, mul);
-}
-
 static void
 frequency_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
 {
@@ -323,6 +330,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+   case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -540,6 +548,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
val = ktime_to_ns(intel_engine_get_busy_time(engine));
} else {
val = engine->pmu.sample[sample].cur;
+
+   if (sample == I915_SAMPLE_QUEUED)
+   val = div_u64(val, MSEC_PER_SEC);  /* to qd */
}
} else {
switch (event->attr.config) {
@@ -796,6 +807,16 @@ static const struct attribute_group 
*i915_pmu_attr_groups[] = {
 { \
.sample = (__sample), \
.name = (__name), \
+   .suffix = "unit", \
+   .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+   .sample = (__sample), \
+   .name = (__name), \
+   .suffix = "scale", \
+   .value = (__scale), \
 }
 
 static struct i915_ext_attribute *
@@ -823,6 +844,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
return ++attr;
 }
 
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.0009765625
+
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
 {
@@ -839,10 +863,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+   char *suffix;
+   char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+   __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+__stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -852,6 +880,9 @@ create_event_attributes(struct drm_i915_private *i915)
enum intel_engine_id id;
unsigned int i;
 
+   BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+(1 / I915_SAMPLE_QUEUED_SCALE));
+
/* Count

Re: [Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-04-06 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-04-05 13:39:20)
> From: Tvrtko Ursulin 
> 
> We add a PMU counter to expose the number of requests which have been
> submitted from userspace but are not yet runnable due dependencies and
> unsignaled fences.
> 
> This is useful to analyze the overall load of the system.
> 
> v2:
>  * Rebase for name change and re-order.
>  * Drop floating point constant. (Chris Wilson)
> 
> v3:
>  * Change scale to 1024 for faster arithmetics. (Chris Wilson)
> 
> Signed-off-by: Tvrtko Ursulin 

I have nothing to complain about,
Reviewed-by: Chris Wilson 
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-04-05 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.

This is useful to analyze the overall load of the system.

v2:
 * Rebase for name change and re-order.
 * Drop floating point constant. (Chris Wilson)

v3:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 40 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h |  9 +++-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index eb60943671b3..07f5cac97b56 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,7 +15,8 @@
 #define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
 BIT(I915_SAMPLE_WAIT) | \
-BIT(I915_SAMPLE_SEMA))
+BIT(I915_SAMPLE_SEMA) | \
+BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -199,6 +200,11 @@ static void engines_sample(struct drm_i915_private 
*dev_priv)
 
update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
  PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+   update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+ I915_SAMPLE_QUEUED_DIVISOR,
+ 
atomic_read(&engine->request_stats.queued));
}
 
if (fw)
@@ -296,6 +302,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+   case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -497,6 +504,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
} else {
val = engine->pmu.sample[sample].cur;
}
+
+   if (sample == I915_SAMPLE_QUEUED)
+   val = div_u64(val, FREQUENCY);
} else {
switch (event->attr.config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -752,6 +762,16 @@ static const struct attribute_group 
*i915_pmu_attr_groups[] = {
 { \
.sample = (__sample), \
.name = (__name), \
+   .suffix = "unit", \
+   .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+   .sample = (__sample), \
+   .name = (__name), \
+   .suffix = "scale", \
+   .value = (__scale), \
 }
 
 static struct i915_ext_attribute *
@@ -779,6 +799,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
return ++attr;
 }
 
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.0009765625
+
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
 {
@@ -795,10 +818,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+   char *suffix;
+   char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+   __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+__stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -808,6 +835,9 @@ create_event_attributes(struct drm_i915_private *i915)
enum intel_engine_id id;
unsigned int i;
 
+   BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+(1 / I915_SAMPLE_QUEUED_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
@@ -885,13 +915,15 @@ create_event_attributes(struct drm_i915_private *i915)

engine->instance,

engine_events[i].sample));
 
-   str = kasprintf(GFP_KERNEL, "%s-%s.unit",
-   engine->name, engine_events[i].name);
+   str = kasprintf(GFP_KERNEL, "%s-%s.%s",
+   engine->name, engine_events[i].name,
+   engine_events[i].suffix);
if (!str)
goto err;
 
*attr_iter++ = &pmu_iter->attr.attr;
- 

[Intel-gfx] [PATCH 4/7] drm/i915/pmu: Add queued counter

2018-03-19 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

We add a PMU counter to expose the number of requests which have been
submitted from userspace but are not yet runnable due dependencies and
unsignaled fences.

This is useful to analyze the overall load of the system.

v2:
 * Rebase for name change and re-order.
 * Drop floating point constant. (Chris Wilson)

v3:
 * Change scale to 1024 for faster arithmetics. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin 
---
 drivers/gpu/drm/i915/i915_pmu.c | 40 +
 drivers/gpu/drm/i915/intel_ringbuffer.h |  2 +-
 include/uapi/drm/i915_drm.h |  9 +++-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index eb60943671b3..07f5cac97b56 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -15,7 +15,8 @@
 #define ENGINE_SAMPLE_MASK \
(BIT(I915_SAMPLE_BUSY) | \
 BIT(I915_SAMPLE_WAIT) | \
-BIT(I915_SAMPLE_SEMA))
+BIT(I915_SAMPLE_SEMA) | \
+BIT(I915_SAMPLE_QUEUED))
 
 #define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
 
@@ -199,6 +200,11 @@ static void engines_sample(struct drm_i915_private 
*dev_priv)
 
update_sample(&engine->pmu.sample[I915_SAMPLE_SEMA],
  PERIOD, !!(val & RING_WAIT_SEMAPHORE));
+
+   if (engine->pmu.enable & BIT(I915_SAMPLE_QUEUED))
+   update_sample(&engine->pmu.sample[I915_SAMPLE_QUEUED],
+ I915_SAMPLE_QUEUED_DIVISOR,
+ 
atomic_read(&engine->request_stats.queued));
}
 
if (fw)
@@ -296,6 +302,7 @@ engine_event_status(struct intel_engine_cs *engine,
switch (sample) {
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
+   case I915_SAMPLE_QUEUED:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -497,6 +504,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
} else {
val = engine->pmu.sample[sample].cur;
}
+
+   if (sample == I915_SAMPLE_QUEUED)
+   val = div_u64(val, FREQUENCY);
} else {
switch (event->attr.config) {
case I915_PMU_ACTUAL_FREQUENCY:
@@ -752,6 +762,16 @@ static const struct attribute_group 
*i915_pmu_attr_groups[] = {
 { \
.sample = (__sample), \
.name = (__name), \
+   .suffix = "unit", \
+   .value = "ns", \
+}
+
+#define __engine_event_scale(__sample, __name, __scale) \
+{ \
+   .sample = (__sample), \
+   .name = (__name), \
+   .suffix = "scale", \
+   .value = (__scale), \
 }
 
 static struct i915_ext_attribute *
@@ -779,6 +799,9 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char 
*name,
return ++attr;
 }
 
+/* No brackets or quotes below please. */
+#define I915_SAMPLE_QUEUED_SCALE 0.0009765625
+
 static struct attribute **
 create_event_attributes(struct drm_i915_private *i915)
 {
@@ -795,10 +818,14 @@ create_event_attributes(struct drm_i915_private *i915)
static const struct {
enum drm_i915_pmu_engine_sample sample;
char *name;
+   char *suffix;
+   char *value;
} engine_events[] = {
__engine_event(I915_SAMPLE_BUSY, "busy"),
__engine_event(I915_SAMPLE_SEMA, "sema"),
__engine_event(I915_SAMPLE_WAIT, "wait"),
+   __engine_event_scale(I915_SAMPLE_QUEUED, "queued",
+__stringify(I915_SAMPLE_QUEUED_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -808,6 +835,9 @@ create_event_attributes(struct drm_i915_private *i915)
enum intel_engine_id id;
unsigned int i;
 
+   BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+(1 / I915_SAMPLE_QUEUED_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
@@ -885,13 +915,15 @@ create_event_attributes(struct drm_i915_private *i915)

engine->instance,

engine_events[i].sample));
 
-   str = kasprintf(GFP_KERNEL, "%s-%s.unit",
-   engine->name, engine_events[i].name);
+   str = kasprintf(GFP_KERNEL, "%s-%s.%s",
+   engine->name, engine_events[i].name,
+   engine_events[i].suffix);
if (!str)
goto err;
 
*attr_iter++ = &pmu_iter->attr.attr;
-