[PATCH v9 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit

2016-11-15 Thread sourab gupta
On Mon, 2016-11-07 at 11:49 -0800, Robert Bragg wrote:
> Gen graphics hardware can be set up to periodically write snapshots of
> performance counters into a circular buffer via its Observation
> Architecture and this patch exposes that capability to userspace via
> the
> i915 perf interface.
> 
> v2:
>Make sure to initialize ->specific_ctx_id when opening, without
>relying on _pin_notify hook, in case ctx already pinned.
> v3:
>Revert back to pinning ctx upfront when opening stream, removing
>need to hook in to pinning and to update OACONTROL on the fly.
> 
> Signed-off-by: Robert Bragg 
> Signed-off-by: Zhenyu Wang 
> Cc: Chris Wilson 
> Reviewed-by: Matthew Auld 
Have been working for quite some time on extending the interfaces per
the usecase of multiple concurrent streams (on different engines), and
infrastructure fits quite well for these usecases. With Chris' comments
addressed, the patch can have my r-b.
Reviewed-by: Sourab Gupta 



[PATCH v9 06/11] drm/i915: Enable i915 perf stream for Haswell OA unit

2016-11-07 Thread Robert Bragg
Gen graphics hardware can be set up to periodically write snapshots of
performance counters into a circular buffer via its Observation
Architecture and this patch exposes that capability to userspace via the
i915 perf interface.

v2:
   Make sure to initialize ->specific_ctx_id when opening, without
   relying on _pin_notify hook, in case ctx already pinned.
v3:
   Revert back to pinning ctx upfront when opening stream, removing
   need to hook in to pinning and to update OACONTROL on the fly.

Signed-off-by: Robert Bragg 
Signed-off-by: Zhenyu Wang 
Cc: Chris Wilson 
Reviewed-by: Matthew Auld 
---
 drivers/gpu/drm/i915/i915_drv.h  |   66 ++-
 drivers/gpu/drm/i915/i915_perf.c | 1036 +-
 drivers/gpu/drm/i915/i915_reg.h  |  338 +
 include/uapi/drm/i915_drm.h  |   71 ++-
 4 files changed, 1482 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index bdebb66..8003120 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1785,6 +1785,11 @@ struct intel_wm_config {
bool sprites_scaled;
 };

+struct i915_oa_format {
+   u32 format;
+   int size;
+};
+
 struct i915_oa_reg {
i915_reg_t addr;
u32 value;
@@ -1805,11 +1810,6 @@ struct i915_perf_stream_ops {
 */
void (*disable)(struct i915_perf_stream *stream);

-   /* Return: true if any i915 perf records are ready to read()
-* for this stream.
-*/
-   bool (*can_read)(struct i915_perf_stream *stream);
-
/* Call poll_wait, passing a wait queue that will be woken
 * once there is something ready to read() for the stream
 */
@@ -1819,9 +1819,7 @@ struct i915_perf_stream_ops {

/* For handling a blocking read, wait until there is something
 * to ready to read() for the stream. E.g. wait on the same
-* wait queue that would be passed to poll_wait() until
-* ->can_read() returns true (if its safe to call ->can_read()
-* without the i915 perf lock held).
+* wait queue that would be passed to poll_wait().
 */
int (*wait_unlocked)(struct i915_perf_stream *stream);

@@ -1861,11 +1859,28 @@ struct i915_perf_stream {
struct list_head link;

u32 sample_flags;
+   int sample_size;

struct i915_gem_context *ctx;
bool enabled;

-   struct i915_perf_stream_ops *ops;
+   const struct i915_perf_stream_ops *ops;
+};
+
+struct i915_oa_ops {
+   void (*init_oa_buffer)(struct drm_i915_private *dev_priv);
+   int (*enable_metric_set)(struct drm_i915_private *dev_priv);
+   void (*disable_metric_set)(struct drm_i915_private *dev_priv);
+   void (*oa_enable)(struct drm_i915_private *dev_priv);
+   void (*oa_disable)(struct drm_i915_private *dev_priv);
+   void (*update_oacontrol)(struct drm_i915_private *dev_priv);
+   void (*update_hw_ctx_id_locked)(struct drm_i915_private *dev_priv,
+   u32 ctx_id);
+   int (*read)(struct i915_perf_stream *stream,
+   char __user *buf,
+   size_t count,
+   size_t *offset);
+   bool (*oa_buffer_is_empty)(struct drm_i915_private *dev_priv);
 };

 struct drm_i915_private {
@@ -2171,16 +2186,47 @@ struct drm_i915_private {

struct {
bool initialized;
+
struct mutex lock;
struct list_head streams;

+   spinlock_t hook_lock;
+
struct {
-   u32 metrics_set;
+   struct i915_perf_stream *exclusive_stream;
+
+   u32 specific_ctx_id;
+   struct i915_vma *pinned_rcs_vma;
+
+   struct hrtimer poll_check_timer;
+   wait_queue_head_t poll_wq;
+   bool pollin;
+
+   bool periodic;
+   int period_exponent;
+   int timestamp_frequency;
+
+   int tail_margin;
+
+   int metrics_set;

const struct i915_oa_reg *mux_regs;
int mux_regs_len;
const struct i915_oa_reg *b_counter_regs;
int b_counter_regs_len;
+
+   struct {
+   struct i915_vma *vma;
+   u8 *vaddr;
+   int format;
+   int format_size;
+   } oa_buffer;
+
+   u32 gen7_latched_oastatus1;
+
+   struct i915_oa_ops ops;
+   const struct i915_oa_format *oa_formats;
+   int n_builtin_sets;
} oa;
} perf;

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index