We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.
v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
Move oa_config vma to active (Chris)
v3: Don't drop the lock for engine lookup (Chris)
Move OA config vma to active before writing the ringbuffer (Chris)
Signed-off-by: Lionel Landwerlin
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c| 110 +-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 7 ++
drivers/gpu/drm/i915/gt/intel_lrc.c | 1 +
drivers/gpu/drm/i915/gt/intel_ringbuffer.c| 4 +-
drivers/gpu/drm/i915/i915_drv.c | 4 +
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_perf.c | 14 +--
include/uapi/drm/i915_drm.h | 37 ++
8 files changed, 169 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 02dc5480e8fe..4a785999a9c5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -280,7 +280,11 @@ struct i915_execbuffer {
struct {
u64 flags; /** Available extensions parameters */
struct drm_i915_gem_exec_timeline_fences timeline_fences;
+ struct drm_i915_gem_execbuffer_perf_ext perf_config;
} extensions;
+
+ struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is
not needed. */
+ struct drm_i915_gem_object *oa_bo;
};
#define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1198,6 +1202,34 @@ static int reloc_move_to_gpu(struct i915_request *rq,
struct i915_vma *vma)
return err;
}
+
+static int
+get_execbuf_oa_config(struct drm_i915_private *dev_priv,
+ s32 perf_fd, u64 oa_config_id,
+ struct i915_oa_config **out_oa_config,
+ struct drm_i915_gem_object **out_oa_obj)
+{
+ struct file *perf_file;
+ int ret;
+
+ if (!dev_priv->perf.oa.exclusive_stream)
+ return -EINVAL;
+
+ perf_file = fget(perf_fd);
+ if (!perf_file)
+ return -EINVAL;
+
+ if (perf_file->private_data != dev_priv->perf.oa.exclusive_stream)
+ return -EINVAL;
+
+ fput(perf_file);
+
+ ret = i915_perf_get_oa_config(dev_priv, oa_config_id,
+ out_oa_config, out_oa_obj);
+
+ return ret;
+}
+
static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
struct i915_vma *vma,
unsigned int len)
@@ -2060,6 +2092,51 @@ add_to_client(struct i915_request *rq, struct drm_file
*file)
list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list);
}
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+ struct i915_vma *oa_vma;
+ int err;
+
+ if (!eb->oa_config)
+ return 0;
+
+ /*
+* If the config hasn't changed, skip reconfiguring the HW (this is
+* subject to a delay we want to avoid has much as possible).
+*/
+ if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config)
+ return 0;
+
+ oa_vma = i915_vma_instance(eb->oa_bo,
+ &eb->engine->i915->ggtt.vm, NULL);
+ if (unlikely(IS_ERR(oa_vma)))
+ return PTR_ERR(oa_vma);
+
+ err = i915_vma_pin(oa_vma, 0, 0, PIN_GLOBAL);
+ if (err)
+ return err;
+
+ err = i915_vma_move_to_active(oa_vma, eb->request, 0);
+ if (err) {
+ i915_vma_unpin(oa_vma);
+ return err;
+ }
+
+ err = eb->engine->emit_bb_start(eb->request,
+ oa_vma->node.start,
+ 0, I915_DISPATCH_SECURE);
+ if (err) {
+ i915_vma_unpin(oa_vma);
+ return err;
+ }
+
+ i915_vma_unpin(oa_vma);
+
+ swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config);
+
+ return 0;
+}
+
static int eb_submit(struct i915_execbuffer *eb)
{
int err;
@@ -2086,6 +2163,10 @@ static int eb_submit(struct i915_execbuffer *eb)
return err;
}
+ err = eb_oa_config(eb);
+ if (err)
+ return err;
+
err = eb->engine->emit_bb_start(eb->request,
eb->batch->node.start +
eb->batch_start_offset,
@@ -2519,6 +2600,13 @@ parse_execbuf2_extensions(struct
drm_i915_gem_execbuffer2 *arg