Re: [Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries

2017-07-24 Thread Ben Widawsky

On 17-07-18 15:36:05, Tvrtko Ursulin wrote:

From: Chris Wilson 

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v2: Use a common timer for the ring sampling.

Signed-off-by: Chris Wilson 
---
drivers/gpu/drm/i915/Makefile   |   1 +
drivers/gpu/drm/i915/i915_drv.c |   2 +
drivers/gpu/drm/i915/i915_drv.h |  23 ++
drivers/gpu/drm/i915/i915_pmu.c | 452 
drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
include/uapi/drm/i915_drm.h |  41 +++
kernel/events/core.c|   1 +
7 files changed, 522 insertions(+)
create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..1c720013dc42 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \

i915-$(CONFIG_COMPAT)   += i915_ioc32.o
i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o

# GEM code
i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d310d8245dca..f18ce519f6a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
struct drm_device *dev = &dev_priv->drm;

i915_gem_shrinker_init(dev_priv);
+   i915_pmu_register(dev_priv);

/*
 * Notify a valid surface after modesetting,
@@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)
intel_opregion_unregister(dev_priv);

i915_perf_unregister(dev_priv);
+   i915_pmu_unregister(dev_priv);

i915_teardown_sysfs(dev_priv);
i915_guc_log_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c6fab08a2e6..de518503e033 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
#include 
#include 
#include 
+#include 
#include 
#include 
#include 
@@ -2093,6 +2094,12 @@ struct intel_cdclk_state {
unsigned int cdclk, vco, ref;
};

+enum {
+   __I915_SAMPLE_FREQ_ACT = 0,
+   __I915_SAMPLE_FREQ_REQ,
+   __I915_NUM_PMU_SAMPLERS
+};
+
struct drm_i915_private {
struct drm_device drm;

@@ -2591,6 +2598,13 @@ struct drm_i915_private {
int irq;
} lpe_audio;

+   struct {
+   struct pmu base;
+   struct hrtimer timer;
+   u64 enable;
+   u64 sample[__I915_NUM_PMU_SAMPLERS];
+   } pmu;
+
/*
 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 * will be rejected. Instead look for a better place.
@@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private 
*dev_priv);
extern void i915_perf_register(struct drm_i915_private *dev_priv);
extern void i915_perf_unregister(struct drm_i915_private *dev_priv);

+/* i915_pmu.c */
+#ifdef CONFIG_PERF_EVENTS
+extern void i915_pmu_register(struct drm_i915_private *i915);
+extern void i915_pmu_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+#endif
+
/* i915_suspend.c */
extern int i915_save_state(struct drm_i915_private *dev_priv);
extern int i915_restore_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
new file mode 100644
index ..f03ddad44da6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -0,0 +1,452 @@
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_ringbuffer.h"
+
+#define FREQUENCY 200
+#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY)
+
+#define RING_MASK 0x
+#define RING_MAX 32
+
+static void engines_sample(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   bool fw = false;
+
+   if ((dev_priv->pmu.enable & RING_MASK) == 0)
+   return;
+
+   if (!dev_priv->gt.awake)
+   return;
+
+   if (!intel_runtime_pm_get_if_in_use(dev_priv))
+   return;
+
+   for_each_engine(engine, dev_priv, id) {
+   u32 val;
+
+   if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0)
+   continue;
+

Re: [Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries

2017-07-20 Thread Tvrtko Ursulin


On 19/07/2017 10:53, Kamble, Sagar A wrote:

Can we reuse calc_residency defined in i915_sysfs.c


Looks like it, that is intel_pm.c/intel_rc6_residency_us.
I will incorporate the change in the series or the patch. Thanks for 
spotting this!


Regards,

Tvrtko



On 7/18/2017 8:06 PM, Tvrtko Ursulin wrote:

From: Chris Wilson 

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v2: Use a common timer for the ring sampling.

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/Makefile   |   1 +
  drivers/gpu/drm/i915/i915_drv.c |   2 +
  drivers/gpu/drm/i915/i915_drv.h |  23 ++
  drivers/gpu/drm/i915/i915_pmu.c | 452 


  drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
  include/uapi/drm/i915_drm.h |  41 +++
  kernel/events/core.c|   1 +
  7 files changed, 522 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile 
b/drivers/gpu/drm/i915/Makefile

index f8227318dcaf..1c720013dc42 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
  i915-$(CONFIG_COMPAT)   += i915_ioc32.o
  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
  # GEM code
  i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c 
b/drivers/gpu/drm/i915/i915_drv.c

index d310d8245dca..f18ce519f6a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1194,6 +1194,7 @@ static void i915_driver_register(struct 
drm_i915_private *dev_priv)

  struct drm_device *dev = &dev_priv->drm;
  i915_gem_shrinker_init(dev_priv);
+i915_pmu_register(dev_priv);
  /*
   * Notify a valid surface after modesetting,
@@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)

  intel_opregion_unregister(dev_priv);
  i915_perf_unregister(dev_priv);
+i915_pmu_unregister(dev_priv);
  i915_teardown_sysfs(dev_priv);
  i915_guc_log_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h 
b/drivers/gpu/drm/i915/i915_drv.h

index 7c6fab08a2e6..de518503e033 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -2093,6 +2094,12 @@ struct intel_cdclk_state {
  unsigned int cdclk, vco, ref;
  };
+enum {
+__I915_SAMPLE_FREQ_ACT = 0,
+__I915_SAMPLE_FREQ_REQ,
+__I915_NUM_PMU_SAMPLERS
+};
+
  struct drm_i915_private {
  struct drm_device drm;
@@ -2591,6 +2598,13 @@ struct drm_i915_private {
  intirq;
  } lpe_audio;
+struct {
+struct pmu base;
+struct hrtimer timer;
+u64 enable;
+u64 sample[__I915_NUM_PMU_SAMPLERS];
+} pmu;
+
  /*
   * NOTE: This is the dri1/ums dungeon, don't add stuff here. 
Your patch

   * will be rejected. Instead look for a better place.
@@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct 
drm_i915_private *dev_priv);

  extern void i915_perf_register(struct drm_i915_private *dev_priv);
  extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
+/* i915_pmu.c */
+#ifdef CONFIG_PERF_EVENTS
+extern void i915_pmu_register(struct drm_i915_private *i915);
+extern void i915_pmu_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+#endif
+
  /* i915_suspend.c */
  extern int i915_save_state(struct drm_i915_private *dev_priv);
  extern int i915_restore_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c 
b/drivers/gpu/drm/i915/i915_pmu.c

new file mode 100644
index ..f03ddad44da6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -0,0 +1,452 @@
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_ringbuffer.h"
+
+#define FREQUENCY 200
+#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY)
+
+#define RING_MASK 0x
+#define RING_MAX 32
+
+static void engines_sample(struct drm_i915_private *dev_priv)
+{
+struct intel_engine_cs *engine;
+enum intel_engine_id id;
+bool fw = false;
+
+if ((dev_priv->pmu.enable & RING_MASK) == 0)
+return;
+
+if (!dev_priv->gt.awake)
+return;
+
+if (!intel_runtime_pm_get_if_in_use(dev_priv

Re: [Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries

2017-07-19 Thread Kamble, Sagar A

Can we reuse calc_residency defined in i915_sysfs.c


On 7/18/2017 8:06 PM, Tvrtko Ursulin wrote:

From: Chris Wilson 

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v2: Use a common timer for the ring sampling.

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/Makefile   |   1 +
  drivers/gpu/drm/i915/i915_drv.c |   2 +
  drivers/gpu/drm/i915/i915_drv.h |  23 ++
  drivers/gpu/drm/i915/i915_pmu.c | 452 
  drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
  include/uapi/drm/i915_drm.h |  41 +++
  kernel/events/core.c|   1 +
  7 files changed, 522 insertions(+)
  create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..1c720013dc42 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
  
  i915-$(CONFIG_COMPAT)   += i915_ioc32.o

  i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
  
  # GEM code

  i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d310d8245dca..f18ce519f6a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
struct drm_device *dev = &dev_priv->drm;
  
  	i915_gem_shrinker_init(dev_priv);

+   i915_pmu_register(dev_priv);
  
  	/*

 * Notify a valid surface after modesetting,
@@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)
intel_opregion_unregister(dev_priv);
  
  	i915_perf_unregister(dev_priv);

+   i915_pmu_unregister(dev_priv);
  
  	i915_teardown_sysfs(dev_priv);

i915_guc_log_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c6fab08a2e6..de518503e033 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -2093,6 +2094,12 @@ struct intel_cdclk_state {
unsigned int cdclk, vco, ref;
  };
  
+enum {

+   __I915_SAMPLE_FREQ_ACT = 0,
+   __I915_SAMPLE_FREQ_REQ,
+   __I915_NUM_PMU_SAMPLERS
+};
+
  struct drm_i915_private {
struct drm_device drm;
  
@@ -2591,6 +2598,13 @@ struct drm_i915_private {

int irq;
} lpe_audio;
  
+	struct {

+   struct pmu base;
+   struct hrtimer timer;
+   u64 enable;
+   u64 sample[__I915_NUM_PMU_SAMPLERS];
+   } pmu;
+
/*
 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 * will be rejected. Instead look for a better place.
@@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private 
*dev_priv);
  extern void i915_perf_register(struct drm_i915_private *dev_priv);
  extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
  
+/* i915_pmu.c */

+#ifdef CONFIG_PERF_EVENTS
+extern void i915_pmu_register(struct drm_i915_private *i915);
+extern void i915_pmu_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+#endif
+
  /* i915_suspend.c */
  extern int i915_save_state(struct drm_i915_private *dev_priv);
  extern int i915_restore_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
new file mode 100644
index ..f03ddad44da6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -0,0 +1,452 @@
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_ringbuffer.h"
+
+#define FREQUENCY 200
+#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY)
+
+#define RING_MASK 0x
+#define RING_MAX 32
+
+static void engines_sample(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   bool fw = false;
+
+   if ((dev_priv->pmu.enable & RING_MASK) == 0)
+   return;
+
+   if (!dev_priv->gt.awake)
+   return;
+
+   if (!intel_runtime_pm_get_if_in_use(dev_priv))
+   return;
+
+   for_each_engine(engine, dev_priv, id) {
+   u32

[Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries

2017-07-18 Thread Tvrtko Ursulin
From: Chris Wilson 

The first goal is to be able to measure GPU (and invidual ring) busyness
without having to poll registers from userspace. (Which not only incurs
holding the forcewake lock indefinitely, perturbing the system, but also
runs the risk of hanging the machine.) As an alternative we can use the
perf event counter interface to sample the ring registers periodically
and send those results to userspace.

To be able to do so, we need to export the two symbols from
kernel/events/core.c to register and unregister a PMU device.

v2: Use a common timer for the ring sampling.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/Makefile   |   1 +
 drivers/gpu/drm/i915/i915_drv.c |   2 +
 drivers/gpu/drm/i915/i915_drv.h |  23 ++
 drivers/gpu/drm/i915/i915_pmu.c | 452 
 drivers/gpu/drm/i915/intel_ringbuffer.h |   2 +
 include/uapi/drm/i915_drm.h |  41 +++
 kernel/events/core.c|   1 +
 7 files changed, 522 insertions(+)
 create mode 100644 drivers/gpu/drm/i915/i915_pmu.c

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f8227318dcaf..1c720013dc42 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -26,6 +26,7 @@ i915-y := i915_drv.o \
 
 i915-$(CONFIG_COMPAT)   += i915_ioc32.o
 i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o
+i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o
 
 # GEM code
 i915-y += i915_cmd_parser.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d310d8245dca..f18ce519f6a2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private 
*dev_priv)
struct drm_device *dev = &dev_priv->drm;
 
i915_gem_shrinker_init(dev_priv);
+   i915_pmu_register(dev_priv);
 
/*
 * Notify a valid surface after modesetting,
@@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct 
drm_i915_private *dev_priv)
intel_opregion_unregister(dev_priv);
 
i915_perf_unregister(dev_priv);
+   i915_pmu_unregister(dev_priv);
 
i915_teardown_sysfs(dev_priv);
i915_guc_log_unregister(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7c6fab08a2e6..de518503e033 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2093,6 +2094,12 @@ struct intel_cdclk_state {
unsigned int cdclk, vco, ref;
 };
 
+enum {
+   __I915_SAMPLE_FREQ_ACT = 0,
+   __I915_SAMPLE_FREQ_REQ,
+   __I915_NUM_PMU_SAMPLERS
+};
+
 struct drm_i915_private {
struct drm_device drm;
 
@@ -2591,6 +2598,13 @@ struct drm_i915_private {
int irq;
} lpe_audio;
 
+   struct {
+   struct pmu base;
+   struct hrtimer timer;
+   u64 enable;
+   u64 sample[__I915_NUM_PMU_SAMPLERS];
+   } pmu;
+
/*
 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 * will be rejected. Instead look for a better place.
@@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private 
*dev_priv);
 extern void i915_perf_register(struct drm_i915_private *dev_priv);
 extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
 
+/* i915_pmu.c */
+#ifdef CONFIG_PERF_EVENTS
+extern void i915_pmu_register(struct drm_i915_private *i915);
+extern void i915_pmu_unregister(struct drm_i915_private *i915);
+#else
+static inline void i915_pmu_register(struct drm_i915_private *i915) {}
+static inline void i915_pmu_unregister(struct drm_i915_private *i915) {}
+#endif
+
 /* i915_suspend.c */
 extern int i915_save_state(struct drm_i915_private *dev_priv);
 extern int i915_restore_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
new file mode 100644
index ..f03ddad44da6
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -0,0 +1,452 @@
+#include 
+#include 
+
+#include "i915_drv.h"
+#include "intel_ringbuffer.h"
+
+#define FREQUENCY 200
+#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY)
+
+#define RING_MASK 0x
+#define RING_MAX 32
+
+static void engines_sample(struct drm_i915_private *dev_priv)
+{
+   struct intel_engine_cs *engine;
+   enum intel_engine_id id;
+   bool fw = false;
+
+   if ((dev_priv->pmu.enable & RING_MASK) == 0)
+   return;
+
+   if (!dev_priv->gt.awake)
+   return;
+
+   if (!intel_runtime_pm_get_if_in_use(dev_priv))
+   return;
+
+   for_each_engine(engine, dev_priv, id) {
+   u32 val;
+
+   if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0)
+   continue;
+
+