Re: [Intel-gfx] [PATCH v3 5/7] drm/i915: add a new perf configuration execbuf parameter

2019-06-04 Thread Lionel Landwerlin

On 04/06/2019 16:40, Chris Wilson wrote:

Quoting Lionel Landwerlin (2019-06-04 14:11:38)

 list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
  }
  
+static int eb_oa_config(struct i915_execbuffer *eb)

+{
+   struct i915_vma *oa_vma;
+   int err;
+
+   if (!eb->oa_config)
+   return 0;
+
+   /*
+* If the config hasn't changed, skip reconfiguring the HW (this is
+* subject to a delay we want to avoid has much as possible).
+*/
+   if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+   return 0;

But you don't order the execution so it may not be the right oa_config.
Just add the barrier. It is virtually no cost for the exclusive oa
userspace.



Ah right sorry :(




How does this interact with the global oa_config being changed via the
ioctl?



eb_oa_config() should be called under the global lock right?
Or are you referring to something else?



  What significance is there for this per-execbuf oa_config being
applied to other users?



The expectation is that a single application is using this and data 
other users get from MI_REPORT_PERF_COUNT is undefined (much like when 
OA is turned off).


Now I see there is a problem with an application with multiple contexts 
because we have the Flex EU counter configurations per context.


I can break the config in 2 parts and execute the flex counter 
programming everything regardless.



We really want to minimize the NOA reprogramming because it takes an 
undefined amount of time to apply (been told below 1ms).



Thanks for spotting those issue.


-Lionel



-Chris



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH v3 5/7] drm/i915: add a new perf configuration execbuf parameter

2019-06-04 Thread Chris Wilson
Quoting Lionel Landwerlin (2019-06-04 14:11:38)
> list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
>  }
>  
> +static int eb_oa_config(struct i915_execbuffer *eb)
> +{
> +   struct i915_vma *oa_vma;
> +   int err;
> +
> +   if (!eb->oa_config)
> +   return 0;
> +
> +   /*
> +* If the config hasn't changed, skip reconfiguring the HW (this is
> +* subject to a delay we want to avoid has much as possible).
> +*/
> +   if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
> +   return 0;

But you don't order the execution so it may not be the right oa_config.
Just add the barrier. It is virtually no cost for the exclusive oa
userspace.

How does this interact with the global oa_config being changed via the
ioctl? What significance is there for this per-execbuf oa_config being
applied to other users?
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH v3 5/7] drm/i915: add a new perf configuration execbuf parameter

2019-06-04 Thread Lionel Landwerlin
We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
Move OA config vma to active before writing the ringbuffer (Chris)

Signed-off-by: Lionel Landwerlin 
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 110 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   7 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c   |   1 +
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c|   4 +-
 drivers/gpu/drm/i915/i915_drv.c   |   4 +
 drivers/gpu/drm/i915/i915_drv.h   |   1 +
 drivers/gpu/drm/i915/i915_perf.c  |  14 +--
 include/uapi/drm/i915_drm.h   |  37 ++
 8 files changed, 169 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 02dc5480e8fe..4a785999a9c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -280,7 +280,11 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
struct drm_i915_gem_exec_timeline_fences timeline_fences;
+   struct drm_i915_gem_execbuffer_perf_ext perf_config;
} extensions;
+
+   struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is 
not needed. */
+   struct drm_i915_gem_object *oa_bo;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1198,6 +1202,34 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+ s32 perf_fd, u64 oa_config_id,
+ struct i915_oa_config **out_oa_config,
+ struct drm_i915_gem_object **out_oa_obj)
+{
+   struct file *perf_file;
+   int ret;
+
+   if (!dev_priv->perf.oa.exclusive_stream)
+   return -EINVAL;
+
+   perf_file = fget(perf_fd);
+   if (!perf_file)
+   return -EINVAL;
+
+   if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+   return -EINVAL;
+
+   fput(perf_file);
+
+   ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+ out_oa_config, out_oa_obj);
+
+   return ret;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
 struct i915_vma *vma,
 unsigned int len)
@@ -2060,6 +2092,51 @@ add_to_client(struct i915_request *rq, struct drm_file 
*file)
list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+   struct i915_vma *oa_vma;
+   int err;
+
+   if (!eb->oa_config)
+   return 0;
+
+   /*
+* If the config hasn't changed, skip reconfiguring the HW (this is
+* subject to a delay we want to avoid has much as possible).
+*/
+   if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+   return 0;
+
+   oa_vma = i915_vma_instance(eb->oa_bo,
+  &eb->engine->i915->ggtt.vm, NULL);
+   if (unlikely(IS_ERR(oa_vma)))
+   return PTR_ERR(oa_vma);
+
+   err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
+   if (err)
+   return err;
+
+   err = i915_vma_move_to_active(oa_vma, eb->request, 0);
+   if (err) {
+   i915_vma_unpin(oa_vma);
+   return err;
+   }
+
+   err = eb->engine->emit_bb_start(eb->request,
+   oa_vma->node.start,
+   0, I915_DISPATCH_SECURE);
+   if (err) {
+   i915_vma_unpin(oa_vma);
+   return err;
+   }
+
+   i915_vma_unpin(oa_vma);
+
+   swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+
+   return 0;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
int err;
@@ -2086,6 +2163,10 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
 
+   err = eb_oa_config(eb);
+   if (err)
+   return err;
+
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
eb->batch_start_offset,
@@ -2519,6 +2600,13 @@ parse_execbuf2_extensions(struct 
drm_i915_gem_execbuffer2 *arg