Re: [Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries
On 17-07-18 15:36:05, Tvrtko Ursulin wrote: From: Chris Wilson The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. To be able to do so, we need to export the two symbols from kernel/events/core.c to register and unregister a PMU device. v2: Use a common timer for the ring sampling. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 23 ++ drivers/gpu/drm/i915/i915_pmu.c | 452 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 41 +++ kernel/events/core.c| 1 + 7 files changed, 522 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_pmu.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f8227318dcaf..1c720013dc42 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -26,6 +26,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d310d8245dca..f18ce519f6a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; i915_gem_shrinker_init(dev_priv); + i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); + i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7c6fab08a2e6..de518503e033 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2093,6 +2094,12 @@ struct intel_cdclk_state { unsigned int cdclk, vco, ref; }; +enum { + __I915_SAMPLE_FREQ_ACT = 0, + __I915_SAMPLE_FREQ_REQ, + __I915_NUM_PMU_SAMPLERS +}; + struct drm_i915_private { struct drm_device drm; @@ -2591,6 +2598,13 @@ struct drm_i915_private { int irq; } lpe_audio; + struct { + struct pmu base; + struct hrtimer timer; + u64 enable; + u64 sample[__I915_NUM_PMU_SAMPLERS]; + } pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv); extern void i915_perf_register(struct drm_i915_private *dev_priv); extern void i915_perf_unregister(struct drm_i915_private *dev_priv); +/* i915_pmu.c */ +#ifdef CONFIG_PERF_EVENTS +extern void i915_pmu_register(struct drm_i915_private *i915); +extern void i915_pmu_unregister(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +#endif + /* i915_suspend.c */ extern int i915_save_state(struct drm_i915_private *dev_priv); extern int i915_restore_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index ..f03ddad44da6 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,452 @@ +#include +#include + +#include "i915_drv.h" +#include "intel_ringbuffer.h" + +#define FREQUENCY 200 +#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY) + +#define RING_MASK 0x +#define RING_MAX 32 + +static void engines_sample(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool fw = false; + + if ((dev_priv->pmu.enable & RING_MASK) == 0) + return; + + if (!dev_priv->gt.awake) + return; + + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return; + + for_each_engine(engine, dev_priv, id) { + u32 val; + + if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0) + continue; +
Re: [Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries
On 19/07/2017 10:53, Kamble, Sagar A wrote: Can we reuse calc_residency defined in i915_sysfs.c Looks like it, that is intel_pm.c/intel_rc6_residency_us. I will incorporate the change in the series or the patch. Thanks for spotting this! Regards, Tvrtko On 7/18/2017 8:06 PM, Tvrtko Ursulin wrote: From: Chris Wilson The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. To be able to do so, we need to export the two symbols from kernel/events/core.c to register and unregister a PMU device. v2: Use a common timer for the ring sampling. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 23 ++ drivers/gpu/drm/i915/i915_pmu.c | 452 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 41 +++ kernel/events/core.c| 1 + 7 files changed, 522 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_pmu.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f8227318dcaf..1c720013dc42 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -26,6 +26,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d310d8245dca..f18ce519f6a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; i915_gem_shrinker_init(dev_priv); +i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); +i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7c6fab08a2e6..de518503e033 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2093,6 +2094,12 @@ struct intel_cdclk_state { unsigned int cdclk, vco, ref; }; +enum { +__I915_SAMPLE_FREQ_ACT = 0, +__I915_SAMPLE_FREQ_REQ, +__I915_NUM_PMU_SAMPLERS +}; + struct drm_i915_private { struct drm_device drm; @@ -2591,6 +2598,13 @@ struct drm_i915_private { intirq; } lpe_audio; +struct { +struct pmu base; +struct hrtimer timer; +u64 enable; +u64 sample[__I915_NUM_PMU_SAMPLERS]; +} pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv); extern void i915_perf_register(struct drm_i915_private *dev_priv); extern void i915_perf_unregister(struct drm_i915_private *dev_priv); +/* i915_pmu.c */ +#ifdef CONFIG_PERF_EVENTS +extern void i915_pmu_register(struct drm_i915_private *i915); +extern void i915_pmu_unregister(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +#endif + /* i915_suspend.c */ extern int i915_save_state(struct drm_i915_private *dev_priv); extern int i915_restore_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index ..f03ddad44da6 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,452 @@ +#include +#include + +#include "i915_drv.h" +#include "intel_ringbuffer.h" + +#define FREQUENCY 200 +#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY) + +#define RING_MASK 0x +#define RING_MAX 32 + +static void engines_sample(struct drm_i915_private *dev_priv) +{ +struct intel_engine_cs *engine; +enum intel_engine_id id; +bool fw = false; + +if ((dev_priv->pmu.enable & RING_MASK) == 0) +return; + +if (!dev_priv->gt.awake) +return; + +if (!intel_runtime_pm_get_if_in_use(dev_priv
Re: [Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries
Can we reuse calc_residency defined in i915_sysfs.c On 7/18/2017 8:06 PM, Tvrtko Ursulin wrote: From: Chris Wilson The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. To be able to do so, we need to export the two symbols from kernel/events/core.c to register and unregister a PMU device. v2: Use a common timer for the ring sampling. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 23 ++ drivers/gpu/drm/i915/i915_pmu.c | 452 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 41 +++ kernel/events/core.c| 1 + 7 files changed, 522 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_pmu.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f8227318dcaf..1c720013dc42 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -26,6 +26,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d310d8245dca..f18ce519f6a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; i915_gem_shrinker_init(dev_priv); + i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); + i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7c6fab08a2e6..de518503e033 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2093,6 +2094,12 @@ struct intel_cdclk_state { unsigned int cdclk, vco, ref; }; +enum { + __I915_SAMPLE_FREQ_ACT = 0, + __I915_SAMPLE_FREQ_REQ, + __I915_NUM_PMU_SAMPLERS +}; + struct drm_i915_private { struct drm_device drm; @@ -2591,6 +2598,13 @@ struct drm_i915_private { int irq; } lpe_audio; + struct { + struct pmu base; + struct hrtimer timer; + u64 enable; + u64 sample[__I915_NUM_PMU_SAMPLERS]; + } pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv); extern void i915_perf_register(struct drm_i915_private *dev_priv); extern void i915_perf_unregister(struct drm_i915_private *dev_priv); +/* i915_pmu.c */ +#ifdef CONFIG_PERF_EVENTS +extern void i915_pmu_register(struct drm_i915_private *i915); +extern void i915_pmu_unregister(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +#endif + /* i915_suspend.c */ extern int i915_save_state(struct drm_i915_private *dev_priv); extern int i915_restore_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index ..f03ddad44da6 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,452 @@ +#include +#include + +#include "i915_drv.h" +#include "intel_ringbuffer.h" + +#define FREQUENCY 200 +#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY) + +#define RING_MASK 0x +#define RING_MAX 32 + +static void engines_sample(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool fw = false; + + if ((dev_priv->pmu.enable & RING_MASK) == 0) + return; + + if (!dev_priv->gt.awake) + return; + + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return; + + for_each_engine(engine, dev_priv, id) { + u32
[Intel-gfx] [RFC 01/14] RFC drm/i915: Expose a PMU interface for perf queries
From: Chris Wilson The first goal is to be able to measure GPU (and invidual ring) busyness without having to poll registers from userspace. (Which not only incurs holding the forcewake lock indefinitely, perturbing the system, but also runs the risk of hanging the machine.) As an alternative we can use the perf event counter interface to sample the ring registers periodically and send those results to userspace. To be able to do so, we need to export the two symbols from kernel/events/core.c to register and unregister a PMU device. v2: Use a common timer for the ring sampling. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/Makefile | 1 + drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 23 ++ drivers/gpu/drm/i915/i915_pmu.c | 452 drivers/gpu/drm/i915/intel_ringbuffer.h | 2 + include/uapi/drm/i915_drm.h | 41 +++ kernel/events/core.c| 1 + 7 files changed, 522 insertions(+) create mode 100644 drivers/gpu/drm/i915/i915_pmu.c diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index f8227318dcaf..1c720013dc42 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -26,6 +26,7 @@ i915-y := i915_drv.o \ i915-$(CONFIG_COMPAT) += i915_ioc32.o i915-$(CONFIG_DEBUG_FS) += i915_debugfs.o intel_pipe_crc.o +i915-$(CONFIG_PERF_EVENTS) += i915_pmu.o # GEM code i915-y += i915_cmd_parser.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index d310d8245dca..f18ce519f6a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1194,6 +1194,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv) struct drm_device *dev = &dev_priv->drm; i915_gem_shrinker_init(dev_priv); + i915_pmu_register(dev_priv); /* * Notify a valid surface after modesetting, @@ -1247,6 +1248,7 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv) intel_opregion_unregister(dev_priv); i915_perf_unregister(dev_priv); + i915_pmu_unregister(dev_priv); i915_teardown_sysfs(dev_priv); i915_guc_log_unregister(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 7c6fab08a2e6..de518503e033 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -2093,6 +2094,12 @@ struct intel_cdclk_state { unsigned int cdclk, vco, ref; }; +enum { + __I915_SAMPLE_FREQ_ACT = 0, + __I915_SAMPLE_FREQ_REQ, + __I915_NUM_PMU_SAMPLERS +}; + struct drm_i915_private { struct drm_device drm; @@ -2591,6 +2598,13 @@ struct drm_i915_private { int irq; } lpe_audio; + struct { + struct pmu base; + struct hrtimer timer; + u64 enable; + u64 sample[__I915_NUM_PMU_SAMPLERS]; + } pmu; + /* * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch * will be rejected. Instead look for a better place. @@ -3760,6 +3774,15 @@ extern void i915_perf_fini(struct drm_i915_private *dev_priv); extern void i915_perf_register(struct drm_i915_private *dev_priv); extern void i915_perf_unregister(struct drm_i915_private *dev_priv); +/* i915_pmu.c */ +#ifdef CONFIG_PERF_EVENTS +extern void i915_pmu_register(struct drm_i915_private *i915); +extern void i915_pmu_unregister(struct drm_i915_private *i915); +#else +static inline void i915_pmu_register(struct drm_i915_private *i915) {} +static inline void i915_pmu_unregister(struct drm_i915_private *i915) {} +#endif + /* i915_suspend.c */ extern int i915_save_state(struct drm_i915_private *dev_priv); extern int i915_restore_state(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c new file mode 100644 index ..f03ddad44da6 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -0,0 +1,452 @@ +#include +#include + +#include "i915_drv.h" +#include "intel_ringbuffer.h" + +#define FREQUENCY 200 +#define PERIOD max_t(u64, 1, NSEC_PER_SEC / FREQUENCY) + +#define RING_MASK 0x +#define RING_MAX 32 + +static void engines_sample(struct drm_i915_private *dev_priv) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + bool fw = false; + + if ((dev_priv->pmu.enable & RING_MASK) == 0) + return; + + if (!dev_priv->gt.awake) + return; + + if (!intel_runtime_pm_get_if_in_use(dev_priv)) + return; + + for_each_engine(engine, dev_priv, id) { + u32 val; + + if ((dev_priv->pmu.enable & (0x7 << (4*id))) == 0) + continue; + +