We want the ability to dispatch a set of command buffer to the
hardware, each with a different OA configuration. To achieve this, we
reuse a couple of fields from the execbuf2 struct (I CAN HAZ
execbuf3?) to notify what OA configuration should be used for a batch
buffer. This requires the process making the execbuf with this flag to
also own the perf fd at the time of execbuf.

v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris)
    Move oa_config vma to active (Chris)

v3: Don't drop the lock for engine lookup (Chris)
    Move OA config vma to active before writing the ringbuffer (Chris)

v4: Reuse i915_user_extension_fn
    Serialize requests with OA config updates

v5: Check that the chained extension is only present once (Chris)
    Unpin oa_vma in main path (Chris)

v6: Use BIT_ULL (Chris)

v7: Hold drm.struct_mutex when serializing the request with OA config (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
---
 .../gpu/drm/i915/gem/i915_gem_execbuffer.c    | 133 +++++++++++++++++-
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |   4 +
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |   9 ++
 drivers/gpu/drm/i915/gt/intel_lrc.c           |   4 +-
 drivers/gpu/drm/i915/gt/intel_ringbuffer.c    |   4 +-
 drivers/gpu/drm/i915/i915_getparam.c          |   4 +
 drivers/gpu/drm/i915/i915_perf.c              |   2 -
 include/uapi/drm/i915_drm.h                   |  39 +++++
 8 files changed, 194 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 46ad8d9642d1..bdecd893cd61 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -24,6 +24,7 @@
 #include "i915_gem_clflush.h"
 #include "i915_gem_context.h"
 #include "i915_gem_ioctls.h"
+#include "i915_perf.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -284,7 +285,12 @@ struct i915_execbuffer {
        struct {
                u64 flags; /** Available extensions parameters */
                struct drm_i915_gem_execbuffer_ext_timeline_fences 
timeline_fences;
+               struct drm_i915_gem_execbuffer_ext_perf perf_config;
        } extensions;
+
+       struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is 
not needed. */
+       struct drm_i915_gem_object *oa_bo;
+       struct i915_vma *oa_vma;
 };
 
 #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags])
@@ -1152,6 +1158,42 @@ static int reloc_move_to_gpu(struct i915_request *rq, 
struct i915_vma *vma)
        return err;
 }
 
+
+static int
+get_execbuf_oa_config(struct i915_execbuffer *eb)
+{
+       struct file *perf_file;
+       int err = 0;
+
+       eb->oa_config = NULL;
+       eb->oa_vma = NULL;
+       eb->oa_bo = NULL;
+
+       if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) 
== 0)
+               return 0;
+
+       perf_file = fget(eb->extensions.perf_config.perf_fd);
+       if (!perf_file) {
+               err = -EINVAL;
+               goto out;
+       }
+
+       if (perf_file->private_data != eb->i915->perf.exclusive_stream)
+               err = -EINVAL;
+
+       if (!err) {
+               err = i915_perf_get_oa_config_and_bo(
+                       eb->i915->perf.exclusive_stream,
+                       eb->extensions.perf_config.oa_config,
+                       &eb->oa_config, &eb->oa_bo);
+       }
+
+       fput(perf_file);
+
+out:
+       return err;
+}
+
 static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                             struct i915_vma *vma,
                             unsigned int len)
@@ -2051,6 +2093,42 @@ add_to_client(struct i915_request *rq, struct drm_file 
*file)
        spin_unlock(&file_priv->mm.lock);
 }
 
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+       int ret;
+
+       if (!eb->oa_config)
+               return 0;
+
+       lockdep_assert_held(&eb->i915->drm.struct_mutex); /* oa_config */
+
+       ret = i915_active_request_set(&eb->engine->last_oa_config,
+                                     eb->request);
+       if (ret)
+               return ret;
+
+       /*
+        * If the config hasn't changed, skip reconfiguring the HW (this is
+        * subject to a delay we want to avoid has much as possible).
+        */
+       if (eb->oa_config == eb->i915->perf.exclusive_stream->oa_config)
+               return 0;
+
+       ret = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
+       if (ret)
+               return ret;
+
+       ret = eb->engine->emit_bb_start(eb->request,
+                                       eb->oa_vma->node.start,
+                                       0, I915_DISPATCH_SECURE);
+       if (ret)
+               return ret;
+
+       swap(eb->oa_config, eb->i915->perf.exclusive_stream->oa_config);
+
+       return 0;
+}
+
 static int eb_submit(struct i915_execbuffer *eb)
 {
        int err;
@@ -2077,6 +2155,10 @@ static int eb_submit(struct i915_execbuffer *eb)
                        return err;
        }
 
+       err = eb_oa_config(eb);
+       if (err)
+               return err;
+
        err = eb->engine->emit_bb_start(eb->request,
                                        eb->batch->node.start +
                                        eb->batch_start_offset,
@@ -2643,8 +2725,25 @@ static int parse_timeline_fences(struct 
i915_user_extension __user *ext, void *d
        return 0;
 }
 
+static int parse_perf_config(struct i915_user_extension __user *ext, void 
*data)
+{
+       struct i915_execbuffer *eb = data;
+
+       if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+               return -EINVAL;
+
+       if (copy_from_user(&eb->extensions.perf_config, ext,
+                          sizeof(eb->extensions.perf_config)))
+               return -EFAULT;
+
+       eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+       return 0;
+}
+
 static const i915_user_extension_fn execbuf_extensions[] = {
         [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
 };
 
 static int
@@ -2755,10 +2854,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                }
        }
 
-       err = eb_create(&eb);
+       err = get_execbuf_oa_config(&eb);
        if (err)
                goto err_out_fence;
 
+       err = eb_create(&eb);
+       if (err)
+               goto err_oa_config;
+
        GEM_BUG_ON(!eb.lut_size);
 
        err = eb_select_context(&eb);
@@ -2769,6 +2872,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        if (unlikely(err))
                goto err_context;
 
+       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+               if (!intel_engine_has_oa(eb.engine)) {
+                       err = -ENODEV;
+                       goto err_engine;
+               }
+       }
+
        err = i915_mutex_lock_interruptible(dev);
        if (err)
                goto err_engine;
@@ -2889,6 +2999,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                }
        }
 
+       if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+               eb.oa_vma = i915_vma_instance(eb.oa_bo,
+                                             &eb.engine->i915->ggtt.vm, NULL);
+               if (unlikely(IS_ERR(eb.oa_vma))) {
+                       err = PTR_ERR(eb.oa_vma);
+                       eb.oa_vma = NULL;
+                       goto err_request;
+               }
+
+               err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL);
+               if (err)
+                       goto err_request;
+       }
+
        /*
         * Whilst this request exists, batch_obj will be on the
         * active_list, and so will hold the active reference. Only when this
@@ -2935,6 +3059,13 @@ i915_gem_do_execbuffer(struct drm_device *dev,
        i915_gem_context_put(eb.gem_context);
 err_destroy:
        eb_destroy(&eb);
+err_oa_config:
+       if (eb.oa_config) {
+               i915_gem_object_put(eb.oa_bo);
+               i915_oa_config_put(eb.oa_config);
+       }
+       if (eb.oa_vma)
+               i915_vma_unpin(eb.oa_vma);
 err_out_fence:
        if (out_fence_fd != -1)
                put_unused_fd(out_fence_fd);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c 
b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 17006d50b63f..f65375a26532 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -786,6 +786,10 @@ int intel_engine_init_common(struct intel_engine_cs 
*engine)
 
        engine->emit_fini_breadcrumb_dw = ret;
 
+
+       INIT_ACTIVE_REQUEST(&engine->last_oa_config,
+                           &engine->i915->drm.struct_mutex);
+
        return 0;
 
 err_unpin:
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h 
b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 15e02cb58a67..c62bdb464a06 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -399,6 +399,8 @@ struct intel_engine_cs {
        struct i915_wa_list wa_list;
        struct i915_wa_list whitelist;
 
+       struct i915_active_request last_oa_config;
+
        u32             irq_keep_mask; /* always keep these interrupts */
        u32             irq_enable_mask; /* bitmask to enable ring interrupt */
        void            (*irq_enable)(struct intel_engine_cs *engine);
@@ -481,6 +483,7 @@ struct intel_engine_cs {
 #define I915_ENGINE_HAS_SEMAPHORES   BIT(3)
 #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4)
 #define I915_ENGINE_IS_VIRTUAL       BIT(5)
+#define I915_ENGINE_HAS_OA           BIT(6)
        unsigned int flags;
 
        /*
@@ -576,6 +579,12 @@ intel_engine_is_virtual(const struct intel_engine_cs 
*engine)
        return engine->flags & I915_ENGINE_IS_VIRTUAL;
 }
 
+static inline bool
+intel_engine_has_oa(const struct intel_engine_cs *engine)
+{
+       return engine->flags & I915_ENGINE_HAS_OA;
+}
+
 #define instdone_has_slice(dev_priv___, sseu___, slice___) \
        ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
 
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c 
b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a141e9e37bf7..709b08f973c5 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -3082,8 +3082,10 @@ int intel_execlists_submission_setup(struct 
intel_engine_cs *engine)
        logical_ring_default_vfuncs(engine);
        logical_ring_default_irqs(engine);
 
-       if (engine->class == RENDER_CLASS)
+       if (engine->class == RENDER_CLASS) {
                rcs_submission_override(engine);
+               engine->flags |= I915_ENGINE_HAS_OA;
+       }
 
        return 0;
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
index 601c16239fdf..6b06f64ffa23 100644
--- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c
@@ -2250,8 +2250,10 @@ static void setup_rcs(struct intel_engine_cs *engine)
                engine->irq_enable_mask = I915_USER_INTERRUPT;
        }
 
-       if (IS_HASWELL(i915))
+       if (IS_HASWELL(i915)) {
                engine->emit_bb_start = hsw_emit_bb_start;
+               engine->flags |= I915_ENGINE_HAS_OA;
+       }
 
        engine->resume = rcs_resume;
 }
diff --git a/drivers/gpu/drm/i915/i915_getparam.c 
b/drivers/gpu/drm/i915/i915_getparam.c
index bd41cc5ce906..39d4c2c2e0f4 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -161,6 +161,10 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
        case I915_PARAM_PERF_REVISION:
                value = i915_perf_ioctl_version();
                break;
+       case I915_PARAM_HAS_EXEC_PERF_CONFIG:
+               /* Obviously requires perf support. */
+               value = i915->perf.initialized;
+               break;
        default:
                DRM_DEBUG("Unknown parameter %d\n", param->param);
                return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index dc3f3170764b..b9e76378b0c2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2666,8 +2666,6 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
        free_oa_buffer(stream);
 
 err_oa_buf_alloc:
-       i915_oa_config_put(stream->oa_config);
-
        intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL);
        intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref);
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index e98c9a7baa91..3166c9ca85f3 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait {
  */
 #define I915_PARAM_PERF_REVISION       55
 
+/*
+ * Request an i915/perf performance configuration change before running the
+ * commands given in an execbuf.
+ *
+ * Performance configuration ID and the file descriptor of the i915 perf
+ * stream are given through drm_i915_gem_execbuffer_ext_perf. See
+ * I915_EXEC_EXT.
+ */
+#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56
+
 /* Must be kept compact -- no holes and well documented */
 
 typedef struct drm_i915_getparam {
@@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext {
         */
        DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 1,
 
+       /**
+        * This identifier is associated with
+        * drm_i915_gem_execbuffer_perf_ext.
+        */
+       DRM_I915_GEM_EXECBUFFER_EXT_PERF,
+
        DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */
 };
 
@@ -1056,6 +1072,29 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences {
        __u64 values_ptr;
 };
 
+struct drm_i915_gem_execbuffer_ext_perf {
+       struct i915_user_extension base;
+
+       /**
+        * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN.
+        * This is used to identify that the application requesting a HW
+        * performance configuration change actually has a right to do so
+        * because it also has access the i915-perf stream.
+        */
+       __s32 perf_fd;
+
+       /**
+        * Unused for now. Must be cleared to zero.
+        */
+       __u32 pad;
+
+       /**
+        * OA configuration ID to switch to before executing the commands
+        * associated to the execbuf.
+        */
+       __u64 oa_config;
+};
+
 struct drm_i915_gem_execbuffer2 {
        /**
         * List of gem_exec_object2 structs
-- 
2.23.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to