Re: [Intel-gfx] [RFC 5/8] drm/i915: Add support for forwarding ring id in sample metadata through perf

2015-08-05 Thread Chris Wilson
On Wed, Aug 05, 2015 at 11:25:41AM +0530, sourab.gu...@intel.com wrote:
> @@ -542,18 +548,27 @@ static void forward_one_gen_pmu_sample(struct 
> drm_i915_private *dev_priv,
>   struct perf_sample_data data;
>   struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
>   int snapshot_size;
> - u8 *snapshot;
> + u8 *snapshot, *current_ptr;
>   struct drm_i915_ts_node_ctx_id *ctx_info;
> + struct drm_i915_ts_node_ring_id *ring_info;
>   struct perf_raw_record raw;
>  
> - BUILD_BUG_ON(TS_DATA_SIZE != 8);
> - BUILD_BUG_ON(CTX_INFO_SIZE != 8);
> + BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
> + (RING_INFO_SIZE != 8));
>  
>   snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
>   snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
>  
>   ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + TS_DATA_SIZE);
>   ctx_info->ctx_id = node->ctx_id;
> + current_ptr = snapshot + snapshot_size;
> +
> + if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING) {
> + ring_info = (struct drm_i915_ts_node_ring_id *)current_ptr;
> + ring_info->ring = node->ring;

Stylewise I would be move familar with current_ptr = ring_info + 1, and
make current_ptr void*. snapshot_size is then redundant.

> + snapshot_size += RING_INFO_SIZE;
> + current_ptr = snapshot + snapshot_size;
> + }

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC 5/8] drm/i915: Add support for forwarding ring id in sample metadata through perf

2015-08-04 Thread sourab . gupta
From: Sourab Gupta 

This patch introduces flags and adds support for having ring id output with
the timestamp samples and forwarding them through perf.

When the userspace expresses its interest in listening to the ring id
through a gen pmu attr field during event init, the samples generated would
have an additional field appended with the ring id information. This patch
enables this framework, which can be expanded upon to introduce further
fields in the gen pmu attr through which additional metadata information
can be appended to samples.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h |  3 ++
 drivers/gpu/drm/i915/i915_oa_perf.c | 98 +++--
 include/uapi/drm/i915_drm.h | 13 +
 3 files changed, 111 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 46ece85..70f1bd6 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1683,6 +1683,7 @@ struct i915_gen_pmu_node {
u32 offset;
bool discard;
u32 ctx_id;
+   u32 ring;
 };
 
 extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -2016,6 +2017,8 @@ struct drm_i915_private {
struct list_head node_list;
struct work_struct forward_work;
struct work_struct event_destroy_work;
+#define I915_GEN_PMU_SAMPLE_RING   (1<<0)
+   int sample_info_flags;
} gen_pmu;
 
void (*emit_profiling_data[I915_PROFILE_MAX])
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index 2cf7f1b..41e2407 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -13,6 +13,7 @@
 
 #define TS_DATA_SIZE sizeof(struct drm_i915_ts_data)
 #define CTX_INFO_SIZE sizeof(struct drm_i915_ts_node_ctx_id)
+#define RING_INFO_SIZE sizeof(struct drm_i915_ts_node_ring_id)
 
 static u32 i915_oa_event_paranoid = true;
 
@@ -113,6 +114,9 @@ static void i915_oa_emit_perf_report(struct 
drm_i915_gem_request *req,
i915_vma_move_to_active(dev_priv->oa_pmu.oa_rcs_buffer.vma, ring);
 }
 
+/* Returns the ring's ID mask (i.e. I915_EXEC_) */
+#define ring_id_mask(ring) ((ring)->id + 1)
+
 /*
  * Emits the commands to capture timestamps, into the CS
  */
@@ -139,6 +143,8 @@ static void i915_gen_emit_ts_data(struct 
drm_i915_gem_request *req,
}
 
entry->ctx_id = global_ctx_id;
+   if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING)
+   entry->ring = ring_id_mask(ring);
i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
 
spin_lock(&dev_priv->gen_pmu.lock);
@@ -542,18 +548,27 @@ static void forward_one_gen_pmu_sample(struct 
drm_i915_private *dev_priv,
struct perf_sample_data data;
struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
int snapshot_size;
-   u8 *snapshot;
+   u8 *snapshot, *current_ptr;
struct drm_i915_ts_node_ctx_id *ctx_info;
+   struct drm_i915_ts_node_ring_id *ring_info;
struct perf_raw_record raw;
 
-   BUILD_BUG_ON(TS_DATA_SIZE != 8);
-   BUILD_BUG_ON(CTX_INFO_SIZE != 8);
+   BUILD_BUG_ON((TS_DATA_SIZE != 8) || (CTX_INFO_SIZE != 8) ||
+   (RING_INFO_SIZE != 8));
 
snapshot = dev_priv->gen_pmu.buffer.addr + node->offset;
snapshot_size = TS_DATA_SIZE + CTX_INFO_SIZE;
 
ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + TS_DATA_SIZE);
ctx_info->ctx_id = node->ctx_id;
+   current_ptr = snapshot + snapshot_size;
+
+   if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING) {
+   ring_info = (struct drm_i915_ts_node_ring_id *)current_ptr;
+   ring_info->ring = node->ring;
+   snapshot_size += RING_INFO_SIZE;
+   current_ptr = snapshot + snapshot_size;
+   }
 
/* Note: the raw sample consists of a u32 size member and raw data. The
 * combined size of these two fields is required to be 8 byte aligned.
@@ -999,6 +1014,9 @@ static int init_gen_pmu_buffer(struct perf_event *event)
 
node_size = TS_DATA_SIZE + CTX_INFO_SIZE;
 
+   if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING)
+   node_size += RING_INFO_SIZE;
+
/* size has to be aligned to 8 bytes */
node_size = ALIGN(node_size, 8);
dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1533,16 +1551,90 @@ static int i915_oa_event_event_idx(struct perf_event 
*event)
return 0;
 }
 
+static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr,
+struct drm_i915_gen_pmu_attr *attr)
+{
+   u32 size;
+   int ret;
+
+   if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0))
+   return -EFAULT;
+
+   /*
+* zero the full structure, so that a short copy

[Intel-gfx] [RFC 5/8] drm/i915: Add support for forwarding ring id in sample metadata through perf

2015-07-15 Thread sourab . gupta
From: Sourab Gupta 

This patch introduces flags and adds support for having ring id output with
the timestamp samples and forwarding them through perf.

When the userspace expresses its interest in listening to the ring id
through a gen pmu attr field during event init, the samples generated would
have an additional field appended with the ring id information. This patch
enables this framework, which can be expanded upon to introduce further
fields in the gen pmu attr through which additional metadata information
can be appended to samples.

Signed-off-by: Sourab Gupta 
---
 drivers/gpu/drm/i915/i915_drv.h |  3 ++
 drivers/gpu/drm/i915/i915_oa_perf.c | 90 -
 include/uapi/drm/i915_drm.h | 13 ++
 3 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 59d23d0..cf0528e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1682,6 +1682,7 @@ struct i915_gen_pmu_node {
u32 offset;
bool discard;
u32 ctx_id;
+   u32 ring;
 };
 
 extern const struct i915_oa_reg i915_oa_3d_mux_config_hsw[];
@@ -2011,6 +2012,8 @@ struct drm_i915_private {
struct list_head node_list;
struct work_struct work_timer;
struct work_struct work_event_destroy;
+#define I915_GEN_PMU_SAMPLE_RING   (1<<0)
+   int sample_info_flags;
} gen_pmu;
 
void (*insert_profile_cmd[I915_PROFILE_MAX])
diff --git a/drivers/gpu/drm/i915/i915_oa_perf.c 
b/drivers/gpu/drm/i915/i915_oa_perf.c
index 1780de42..5915720 100644
--- a/drivers/gpu/drm/i915/i915_oa_perf.c
+++ b/drivers/gpu/drm/i915/i915_oa_perf.c
@@ -102,6 +102,9 @@ void i915_oa_insert_cmd(struct intel_ringbuffer *ringbuf, 
u32 ctx_id, int tag)
i915_vma_move_to_active(i915_gem_obj_to_ggtt(obj), ring);
 }
 
+/* Returns the ring's ID mask (i.e. I915_EXEC_) */
+#define ring_id_mask(ring) ((ring)->id + 1)
+
 void i915_gen_insert_cmd_ts(struct intel_ringbuffer *ringbuf, u32 ctx_id,
int tag)
 {
@@ -119,6 +122,8 @@ void i915_gen_insert_cmd_ts(struct intel_ringbuffer 
*ringbuf, u32 ctx_id,
return;
}
entry->ctx_id = ctx_id;
+   if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING)
+   entry->ring = ring_id_mask(ring);
i915_gem_request_assign(&entry->req, ring->outstanding_lazy_request);
 
spin_lock_irqsave(&dev_priv->gen_pmu.lock, lock_flags);
@@ -548,8 +553,9 @@ static void forward_one_gen_pmu_sample(struct 
drm_i915_private *dev_priv,
struct perf_sample_data data;
struct perf_event *event = dev_priv->gen_pmu.exclusive_event;
int ts_size, snapshot_size;
-   u8 *snapshot;
+   u8 *snapshot, *current_ptr;
struct drm_i915_ts_node_ctx_id *ctx_info;
+   struct drm_i915_ts_node_ring_id *ring_info;
struct perf_raw_record raw;
 
ts_size = sizeof(struct drm_i915_ts_data);
@@ -558,6 +564,14 @@ static void forward_one_gen_pmu_sample(struct 
drm_i915_private *dev_priv,
 
ctx_info = (struct drm_i915_ts_node_ctx_id *)(snapshot + ts_size);
ctx_info->ctx_id = node->ctx_id;
+   current_ptr = snapshot + snapshot_size;
+
+   if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING) {
+   ring_info = (struct drm_i915_ts_node_ring_id *)current_ptr;
+   ring_info->ring = node->ring;
+   snapshot_size += sizeof(*ring_info);
+   current_ptr = snapshot + snapshot_size;
+   }
 
perf_sample_data_init(&data, 0, event->hw.last_period);
 
@@ -1010,6 +1024,9 @@ static int init_gen_pmu_buffer(struct perf_event *event)
node_size = sizeof(struct drm_i915_ts_data) +
sizeof(struct drm_i915_ts_node_ctx_id);
 
+   if (dev_priv->gen_pmu.sample_info_flags & I915_GEN_PMU_SAMPLE_RING)
+   node_size += sizeof(struct drm_i915_ts_node_ring_id);
+
/* size has to be aligned to 8 bytes (required by relevant gpu cmds) */
node_size = ALIGN(node_size, 8);
dev_priv->gen_pmu.buffer.node_size = node_size;
@@ -1544,16 +1561,87 @@ static int i915_oa_event_event_idx(struct perf_event 
*event)
return 0;
 }
 
+static int i915_gen_pmu_copy_attr(struct drm_i915_gen_pmu_attr __user *uattr,
+struct drm_i915_gen_pmu_attr *attr)
+{
+   u32 size;
+   int ret;
+
+   if (!access_ok(VERIFY_WRITE, uattr, I915_GEN_PMU_ATTR_SIZE_VER0))
+   return -EFAULT;
+
+   /*
+* zero the full structure, so that a short copy will be nice.
+*/
+   memset(attr, 0, sizeof(*attr));
+
+   ret = get_user(size, &uattr->size);
+   if (ret)
+   return ret;
+
+   if (size > PAGE_SIZE)   /* silly large */
+   goto err_size;
+
+   if (size < I915_GEN_PMU_ATTR_SIZE_VER0)
+