Re: [Intel-gfx] [RFC 04/11] drm/i915/pmu: Expose a PMU interface for perf queries

2017-09-14 Thread Chris Wilson
Quoting Tvrtko Ursulin (2017-09-11 16:25:52)
> +   case I915_PMU_RC6_RESIDENCY:
> +   val = intel_rc6_residency_ns(i915,
> +IS_VALLEYVIEW(i915) ?
> +VLV_GT_RENDER_RC6 :
> +GEN6_GT_GFX_RC6);
> +   break;
> +   case I915_PMU_RC6p_RESIDENCY:
> +   if (!IS_VALLEYVIEW(i915))

HAS_RC6p and HAS_RC6pp?
> +   val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6p);
> +   break;
> +   case I915_PMU_RC6pp_RESIDENCY:
> +   if (!IS_VALLEYVIEW(i915))
> +   val = intel_rc6_residency_ns(i915, GEN6_GT_GFX_RC6pp);
> +   break;
> +   }
> +
> +   return val;
> +}
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC 04/11] drm/i915/pmu: Expose a PMU interface for perf queries

2017-09-12 Thread Tvrtko Ursulin


On 12/09/2017 03:06, Rogozhkin, Dmitry V wrote:

On Mon, 2017-09-11 at 16:25 +0100, Tvrtko Ursulin wrote:

From: Tvrtko Ursulin 

From: Chris Wilson 
From: Tvrtko Ursulin 
From: Dmitry Rogozhkin 

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v1-v2 (Chris Wilson):

v2: Use a common timer for the ring sampling.

v3: (Tvrtko Ursulin)
  * Decouple uAPI from i915 engine ids.
  * Complete uAPI defines.
  * Refactor some code to helpers for clarity.
  * Skip sampling disabled engines.
  * Expose counters in sysfs.
  * Pass in fake regs to avoid null ptr deref in perf core.
  * Convert to class/instance uAPI.
  * Use shared driver code for rc6 residency, power and frequency.

v4: (Dmitry Rogozhkin)
  * Register PMU with .task_ctx_nr=perf_invalid_context
  * Expose cpumask for the PMU with the single CPU in the mask
  * Properly support pmu->stop(): it should call pmu->read()
  * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
  * Make pmu.busy_stats a refcounter to avoid busy stats going away
with some deleted event.


busy_stats appear later in the patch series. And in your final version
busy_stats remain bool while we rely on events refcounting. So, this
item is misleading. Could you, please, change it giving a credit to
general ref-counting of events which you have rewrote for v5 of this
patch?


Oh so I dropped the reference to "drm/i915/pmu: introduce refcounting of 
event subscriptions". My apologies. I will restore it in the next 
version. It wasn't deliberate, just an omission while squashing and 
re-ordering things.


Regards,

Tvrtko


  * Expose cpumask for i915 PMU to avoid multiple events creation of
the same type followed by counter aggregation by perf-stat.
  * Track CPUs getting online/offline to migrate perf context. If (likely)
cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be
needed to see effect of CPU status tracking.
  * End result is that only global events are supported and perf stat
works correctly.
  * Deny perf driver level sampling - it is prohibited for uncore PMU.

v5: (Tvrtko Ursulin)

  * Don't hardcode number of engine samplers.
  * Rewrite event ref-counting for correctness and simplicity.
  * Store initial counter value when starting already enabled events
to correctly report values to all listeners.
  * Fix RC6 residency readout.
  * Comments, GPL header.

Signed-off-by: Chris Wilson 
Signed-off-by: Tvrtko Ursulin 
Signed-off-by: Dmitry Rogozhkin 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Dmitry Rogozhkin 
Cc: Peter Zijlstra 
---
  drivers/gpu/drm/i915/Makefile   |   1 +
  drivers/gpu/drm/i915/i915_drv.c |   2 +
  drivers/gpu/drm/i915/i915_drv.h |  76 
  drivers/gpu/drm/i915/i915_pmu.c | 686 
  drivers/gpu/drm/i915/i915_reg.h |   3 +
  drivers/gpu/drm/i915/intel_engine_cs.c  |  10 +
  drivers/gpu/drm/i915/intel_ringbuffer.c |  25 ++
  drivers/gpu/drm/i915/intel_ringbuffer.h |  25 ++
  include/uapi/drm/i915_drm.h |  58 +++
  9 files changed, 886 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1cb8059a3a16..7b3a0eca62b6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
  
  i915-$(CONFIG_COMPAT)   += i915_ioc32.o

  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
  
  # GEM code

  i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c111ea96e80..b1f96eb1be16 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1196,6 +1196,7 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
struct drm_device *dev = _priv->drm;
  
  	i915_gem_shrinker_init(dev_priv);

+   i915_pmu_register(dev_priv);
  
  	/*

 * Notify a valid surface after modesetting,
@@ -1250,6 +1251,7 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)
intel_opregion_unregister(dev_priv);
  
  	i915_perf_unregister(dev_priv);


Re: [Intel-gfx] [RFC 04/11] drm/i915/pmu: Expose a PMU interface for perf queries

2017-09-11 Thread Rogozhkin, Dmitry V
On Mon, 2017-09-11 at 16:25 +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin 
> 
> From: Chris Wilson 
> From: Tvrtko Ursulin 
> From: Dmitry Rogozhkin 
> 
> The first goal is to be able to measure GPU (and invidual ring) busyness
> without having to poll registers from userspace. (Which not only incurs
> holding the forcewake lock indefinitely, perturbing the system, but also
> runs the risk of hanging the machine.) As an alternative we can use the
> perf event counter interface to sample the ring registers periodically
> and send those results to userspace.
> 
> To be able to do so, we need to export the two symbols from
> kernel/events/core.c to register and unregister a PMU device.
> 
> v1-v2 (Chris Wilson):
> 
> v2: Use a common timer for the ring sampling.
> 
> v3: (Tvrtko Ursulin)
>  * Decouple uAPI from i915 engine ids.
>  * Complete uAPI defines.
>  * Refactor some code to helpers for clarity.
>  * Skip sampling disabled engines.
>  * Expose counters in sysfs.
>  * Pass in fake regs to avoid null ptr deref in perf core.
>  * Convert to class/instance uAPI.
>  * Use shared driver code for rc6 residency, power and frequency.
> 
> v4: (Dmitry Rogozhkin)
>  * Register PMU with .task_ctx_nr=perf_invalid_context
>  * Expose cpumask for the PMU with the single CPU in the mask
>  * Properly support pmu->stop(): it should call pmu->read()
>  * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
>  * Make pmu.busy_stats a refcounter to avoid busy stats going away
>with some deleted event.

busy_stats appear later in the patch series. And in your final version
busy_stats remain bool while we rely on events refcounting. So, this
item is misleading. Could you, please, change it giving a credit to
general ref-counting of events which you have rewrote for v5 of this
patch?

>  * Expose cpumask for i915 PMU to avoid multiple events creation of
>the same type followed by counter aggregation by perf-stat.
>  * Track CPUs getting online/offline to migrate perf context. If (likely)
>cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be
>needed to see effect of CPU status tracking.
>  * End result is that only global events are supported and perf stat
>works correctly.
>  * Deny perf driver level sampling - it is prohibited for uncore PMU.
> 
> v5: (Tvrtko Ursulin)
> 
>  * Don't hardcode number of engine samplers.
>  * Rewrite event ref-counting for correctness and simplicity.
>  * Store initial counter value when starting already enabled events
>to correctly report values to all listeners.
>  * Fix RC6 residency readout.
>  * Comments, GPL header.
> 
> Signed-off-by: Chris Wilson 
> Signed-off-by: Tvrtko Ursulin 
> Signed-off-by: Dmitry Rogozhkin 
> Cc: Tvrtko Ursulin 
> Cc: Chris Wilson 
> Cc: Dmitry Rogozhkin 
> Cc: Peter Zijlstra 
> ---
>  drivers/gpu/drm/i915/Makefile   |   1 +
>  drivers/gpu/drm/i915/i915_drv.c |   2 +
>  drivers/gpu/drm/i915/i915_drv.h |  76 
>  drivers/gpu/drm/i915/i915_pmu.c | 686 
> 
>  drivers/gpu/drm/i915/i915_reg.h |   3 +
>  drivers/gpu/drm/i915/intel_engine_cs.c  |  10 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c |  25 ++
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  25 ++
>  include/uapi/drm/i915_drm.h |  58 +++
>  9 files changed, 886 insertions(+)
>  create mode 100644 drivers/gpu/drm/i915/i915_pmu.c
> 
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 1cb8059a3a16..7b3a0eca62b6 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -26,6 +26,7 @@ i915-y := i915_drv.o \
>  
>  i915-$(CONFIG_COMPAT)   += i915_ioc32.o
>  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
> +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
>  
>  # GEM code
>  i915-y += i915_cmd_parser.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 5c111ea96e80..b1f96eb1be16 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1196,6 +1196,7 @@ static void i915_driver_register(struct 
> drm_i915_private *dev_priv)
>   struct drm_device *dev = _priv->drm;
>  
>   i915_gem_shrinker_init(dev_priv);
> + i915_pmu_register(dev_priv);
>  
>   /*
>* Notify a valid surface after modesetting,
> @@ -1250,6 +1251,7 @@ static void i915_driver_unregister(struct 
> drm_i915_private *dev_priv)
>   intel_opregion_unregister(dev_priv);
>  
>   i915_perf_unregister(dev_priv);
> + i915_pmu_unregister(dev_priv);
>  
>   i915_teardown_sysfs(dev_priv);
>   i915_guc_log_unregister(dev_priv);
> diff --git 

[Intel-gfx] [RFC 04/11] drm/i915/pmu: Expose a PMU interface for perf queries

2017-09-11 Thread Tvrtko Ursulin
From: Tvrtko Ursulin 

From: Chris Wilson 
From: Tvrtko Ursulin 
From: Dmitry Rogozhkin 

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v1-v2 (Chris Wilson):

v2: Use a common timer for the ring sampling.

v3: (Tvrtko Ursulin)
 * Decouple uAPI from i915 engine ids.
 * Complete uAPI defines.
 * Refactor some code to helpers for clarity.
 * Skip sampling disabled engines.
 * Expose counters in sysfs.
 * Pass in fake regs to avoid null ptr deref in perf core.
 * Convert to class/instance uAPI.
 * Use shared driver code for rc6 residency, power and frequency.

v4: (Dmitry Rogozhkin)
 * Register PMU with .task_ctx_nr=perf_invalid_context
 * Expose cpumask for the PMU with the single CPU in the mask
 * Properly support pmu->stop(): it should call pmu->read()
 * Properly support pmu->del(): it should call stop(event, PERF_EF_UPDATE)
 * Make pmu.busy_stats a refcounter to avoid busy stats going away
   with some deleted event.
 * Expose cpumask for i915 PMU to avoid multiple events creation of
   the same type followed by counter aggregation by perf-stat.
 * Track CPUs getting online/offline to migrate perf context. If (likely)
   cpumask will initially set CPU0, CONFIG_BOOTPARAM_HOTPLUG_CPU0 will be
   needed to see effect of CPU status tracking.
 * End result is that only global events are supported and perf stat
   works correctly.
 * Deny perf driver level sampling - it is prohibited for uncore PMU.

v5: (Tvrtko Ursulin)

 * Don't hardcode number of engine samplers.
 * Rewrite event ref-counting for correctness and simplicity.
 * Store initial counter value when starting already enabled events
   to correctly report values to all listeners.
 * Fix RC6 residency readout.
 * Comments, GPL header.

Signed-off-by: Chris Wilson 
Signed-off-by: Tvrtko Ursulin 
Signed-off-by: Dmitry Rogozhkin 
Cc: Tvrtko Ursulin 
Cc: Chris Wilson 
Cc: Dmitry Rogozhkin 
Cc: Peter Zijlstra 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/i915_drv.c |   2 +
 drivers/gpu/drm/i915/i915_drv.h |  76 
 drivers/gpu/drm/i915/i915_pmu.c | 686 
 drivers/gpu/drm/i915/i915_reg.h |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c  |  10 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |  25 ++
 drivers/gpu/drm/i915/intel_ringbuffer.h |  25 ++
 include/uapi/drm/i915_drm.h |  58 +++
 9 files changed, 886 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 1cb8059a3a16..7b3a0eca62b6 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 5c111ea96e80..b1f96eb1be16 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1196,6 +1196,7 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
struct drm_device *dev = _priv->drm;
 
i915_gem_shrinker_init(dev_priv);
+   i915_pmu_register(dev_priv);
 
/*
 * Notify a valid surface after modesetting,
@@ -1250,6 +1251,7 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)
intel_opregion_unregister(dev_priv);
 
i915_perf_unregister(dev_priv);
+   i915_pmu_unregister(dev_priv);
 
i915_teardown_sysfs(dev_priv);
i915_guc_log_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 48daf9552163..62646b8dfb7a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2190,6 +2191,69 @@ struct intel_cdclk_state {
unsigned int cdclk, vco, ref;
 };
 
+enum {
+   __I915_SAMPLE_FREQ_ACT = 0,
+   __I915_SAMPLE_FREQ_REQ,
+   __I915_NUM_PMU_SAMPLERS
+};
+
+/**
+ * How many different events we track in the global PMU