[Intel-gfx] [CI 11/13] drm/i915: add a new perf configuration execbuf parameter
We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) v4: Reuse i915_user_extension_fn Serialize requests with OA config updates v5: Check that the chained extension is only present once (Chris) Unpin oa_vma in main path (Chris) v6: Use BIT_ULL (Chris) v7: Hold drm.struct_mutex when serializing the request with OA config (Chris) v8: Remove active request from engine (Lionel) v9: Move fetching OA configuration pass engine pinning (Lionel) Lock VMA before moving to active (Chris) v10: Fix leak on perf_fd (Lionel) Signed-off-by: Lionel Landwerlin --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 147 +- drivers/gpu/drm/i915/i915_getparam.c | 4 + include/uapi/drm/i915_drm.h | 39 + 3 files changed, 188 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 46ad8d9642d1..d416b60c94bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -24,6 +24,7 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" +#include "i915_perf.h" #include "i915_trace.h" #include "i915_user_extensions.h" @@ -284,7 +285,12 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct drm_i915_gem_execbuffer_ext_perf perf_config; } extensions; + + struct file *perf_file; + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ + struct i915_vma *oa_vma; }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -1152,6 +1158,58 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) return err; } + +static int +eb_get_oa_config(struct i915_execbuffer *eb) +{ + struct drm_i915_gem_object *oa_bo; + int err = 0; + + eb->perf_file = NULL; + eb->oa_config = NULL; + eb->oa_vma = NULL; + + if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0) + return 0; + + eb->perf_file = fget(eb->extensions.perf_config.perf_fd); + if (!eb->perf_file) + return -EINVAL; + + err = i915_mutex_lock_interruptible(&eb->i915->drm); + if (err) + return err; + + if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream) + err = -EINVAL; + + mutex_unlock(&eb->i915->drm.struct_mutex); + + if (err) + return err; + + if (eb->i915->perf.exclusive_stream->engine != eb->engine) + return -EINVAL; + + err = i915_perf_get_oa_config_and_bo( + eb->i915->perf.exclusive_stream, + eb->extensions.perf_config.oa_config, + &eb->oa_config, &oa_bo); + if (err) + return err; + + eb->oa_vma = i915_vma_instance(oa_bo, + &eb->engine->gt->ggtt->vm, NULL); + i915_gem_object_put(oa_bo); + if (IS_ERR(eb->oa_vma)) { + err = PTR_ERR(eb->oa_vma); + eb->oa_vma = NULL; + return err; + } + + return 0; +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -2051,6 +2109,54 @@ add_to_client(struct i915_request *rq, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } +static int eb_oa_config(struct i915_execbuffer *eb) +{ + struct i915_perf_stream *perf_stream; + int err; + + if (!eb->oa_config) + return 0; + + perf_stream = eb->perf_file->private_data; + + err = mutex_lock_interruptible(&perf_stream->config_mutex); + if (err) + return err; + + err = i915_active_request_set(&perf_stream->active_config_rq, + eb->request); + if (err) + goto out; + + /* +* If the config hasn't changed, skip reconfiguring the HW (this is +* subject to a delay we want to avoid has much as possible). +*/ + if (eb->oa_config == perf_stream->oa_config) + goto out; + + i
[Intel-gfx] [CI 11/13] drm/i915: add a new perf configuration execbuf parameter
We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf. v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) v4: Reuse i915_user_extension_fn Serialize requests with OA config updates v5: Check that the chained extension is only present once (Chris) Unpin oa_vma in main path (Chris) v6: Use BIT_ULL (Chris) v7: Hold drm.struct_mutex when serializing the request with OA config (Chris) v8: Remove active request from engine (Lionel) v9: Move fetching OA configuration pass engine pinning (Lionel) Lock VMA before moving to active (Chris) v10: Fix leak on perf_fd (Lionel) Signed-off-by: Lionel Landwerlin --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 147 +- drivers/gpu/drm/i915/i915_getparam.c | 4 + include/uapi/drm/i915_drm.h | 39 + 3 files changed, 188 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 46ad8d9642d1..d416b60c94bb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -24,6 +24,7 @@ #include "i915_gem_clflush.h" #include "i915_gem_context.h" #include "i915_gem_ioctls.h" +#include "i915_perf.h" #include "i915_trace.h" #include "i915_user_extensions.h" @@ -284,7 +285,12 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct drm_i915_gem_execbuffer_ext_perf perf_config; } extensions; + + struct file *perf_file; + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ + struct i915_vma *oa_vma; }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -1152,6 +1158,58 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) return err; } + +static int +eb_get_oa_config(struct i915_execbuffer *eb) +{ + struct drm_i915_gem_object *oa_bo; + int err = 0; + + eb->perf_file = NULL; + eb->oa_config = NULL; + eb->oa_vma = NULL; + + if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0) + return 0; + + eb->perf_file = fget(eb->extensions.perf_config.perf_fd); + if (!eb->perf_file) + return -EINVAL; + + err = i915_mutex_lock_interruptible(&eb->i915->drm); + if (err) + return err; + + if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream) + err = -EINVAL; + + mutex_unlock(&eb->i915->drm.struct_mutex); + + if (err) + return err; + + if (eb->i915->perf.exclusive_stream->engine != eb->engine) + return -EINVAL; + + err = i915_perf_get_oa_config_and_bo( + eb->i915->perf.exclusive_stream, + eb->extensions.perf_config.oa_config, + &eb->oa_config, &oa_bo); + if (err) + return err; + + eb->oa_vma = i915_vma_instance(oa_bo, + &eb->engine->gt->ggtt->vm, NULL); + i915_gem_object_put(oa_bo); + if (IS_ERR(eb->oa_vma)) { + err = PTR_ERR(eb->oa_vma); + eb->oa_vma = NULL; + return err; + } + + return 0; +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -2051,6 +2109,54 @@ add_to_client(struct i915_request *rq, struct drm_file *file) spin_unlock(&file_priv->mm.lock); } +static int eb_oa_config(struct i915_execbuffer *eb) +{ + struct i915_perf_stream *perf_stream; + int err; + + if (!eb->oa_config) + return 0; + + perf_stream = eb->perf_file->private_data; + + err = mutex_lock_interruptible(&perf_stream->config_mutex); + if (err) + return err; + + err = i915_active_request_set(&perf_stream->active_config_rq, + eb->request); + if (err) + goto out; + + /* +* If the config hasn't changed, skip reconfiguring the HW (this is +* subject to a delay we want to avoid has much as possible). +*/ + if (eb->oa_config == perf_stream->oa_config) + goto out; + + i