We want the ability to dispatch a set of command buffer to the hardware, each with a different OA configuration. To achieve this, we reuse a couple of fields from the execbuf2 struct (I CAN HAZ execbuf3?) to notify what OA configuration should be used for a batch buffer. This requires the process making the execbuf with this flag to also own the perf fd at the time of execbuf.
v2: Add a emit_oa_config() vfunc in the intel_engine_cs (Chris) Move oa_config vma to active (Chris) v3: Don't drop the lock for engine lookup (Chris) Move OA config vma to active before writing the ringbuffer (Chris) v4: Reuse i915_user_extension_fn Serialize requests with OA config updates v5: Check that the chained extension is only present once (Chris) Unpin oa_vma in main path (Chris) v6: Use BIT_ULL (Chris) v7: Hold drm.struct_mutex when serializing the request with OA config (Chris) Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com> --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c | 126 +++++++++++++++++- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 + drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++ drivers/gpu/drm/i915/gt/intel_lrc.c | 1 + drivers/gpu/drm/i915/gt/intel_ringbuffer.c | 4 +- drivers/gpu/drm/i915/i915_drv.c | 4 + drivers/gpu/drm/i915/i915_drv.h | 8 +- drivers/gpu/drm/i915/i915_perf.c | 25 ++-- include/uapi/drm/i915_drm.h | 37 +++++ 9 files changed, 199 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index fc5108b8f8ef..2f72b9fa04a6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -283,7 +283,12 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; + struct drm_i915_gem_execbuffer_ext_perf perf_config; } extensions; + + struct i915_oa_config *oa_config; /** HW configuration for OA, NULL is not needed. */ + struct drm_i915_gem_object *oa_bo; + struct i915_vma *oa_vma; }; #define exec_entry(EB, VMA) (&(EB)->exec[(VMA)->exec_flags - (EB)->flags]) @@ -1210,6 +1215,21 @@ static int reloc_move_to_gpu(struct i915_request *rq, struct i915_vma *vma) return err; } + +static int +get_execbuf_oa_config(struct i915_execbuffer *eb) +{ + eb->oa_config = NULL; + eb->oa_vma = NULL; + eb->oa_bo = NULL; + + if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) == 0) + return 0; + + return i915_perf_get_oa_config(eb->i915, eb->extensions.perf_config.oa_config, + &eb->oa_config, &eb->oa_bo); +} + static int __reloc_gpu_alloc(struct i915_execbuffer *eb, struct i915_vma *vma, unsigned int len) @@ -2072,6 +2092,42 @@ add_to_client(struct i915_request *rq, struct drm_file *file) list_add_tail(&rq->client_link, &rq->file_priv->mm.request_list); } +static int eb_oa_config(struct i915_execbuffer *eb) +{ + int ret; + + if (!eb->oa_config) + return 0; + + lockdep_assert_held(&eb->i915->drm.struct_mutex); /* oa_config */ + + ret = i915_active_request_set(&eb->engine->last_oa_config, + eb->request); + if (ret) + return ret; + + /* + * If the config hasn't changed, skip reconfiguring the HW (this is + * subject to a delay we want to avoid has much as possible). + */ + if (eb->oa_config == eb->i915->perf.oa.exclusive_stream->oa_config) + return 0; + + ret = i915_vma_move_to_active(eb->oa_vma, eb->request, 0); + if (ret) + return ret; + + ret = eb->engine->emit_bb_start(eb->request, + eb->oa_vma->node.start, + 0, I915_DISPATCH_SECURE); + if (ret) + return ret; + + swap(eb->oa_config, eb->i915->perf.oa.exclusive_stream->oa_config); + + return 0; +} + static int eb_submit(struct i915_execbuffer *eb) { int err; @@ -2098,6 +2154,10 @@ static int eb_submit(struct i915_execbuffer *eb) return err; } + err = eb_oa_config(eb); + if (err) + return err; + err = eb->engine->emit_bb_start(eb->request, eb->batch->node.start + eb->batch_start_offset, @@ -2537,8 +2597,25 @@ static int parse_timeline_fences(struct i915_user_extension __user *ext, void *d return 0; } +static int parse_perf_config(struct i915_user_extension __user *ext, void *data) +{ + struct i915_execbuffer *eb = data; + + if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) + return -EINVAL; + + if (copy_from_user(&eb->extensions.perf_config, ext, + sizeof(eb->extensions.perf_config))) + return -EFAULT; + + eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF); + + return 0; +} + static const i915_user_extension_fn execbuf_extensions[] = { [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences, + [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config, }; static int @@ -2643,9 +2720,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } + err = get_execbuf_oa_config(&eb); + if (err) + goto err_oa_config; + err = eb_create(&eb); if (err) - goto err_out_fence; + goto err_oa_config; GEM_BUG_ON(!eb.lut_size); @@ -2670,6 +2751,27 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (unlikely(err)) goto err_unlock; + if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { + struct file *perf_file; + + if (!intel_engine_has_oa(eb.engine)) { + err = -ENODEV; + goto err_engine; + } + + perf_file = fget(eb.extensions.perf_config.perf_fd); + if (!perf_file) + goto err_engine; + + if (perf_file->private_data != eb.i915->perf.oa.exclusive_stream) + err = -EINVAL; + + fput(perf_file); + + if (unlikely(err)) + goto err_engine; + } + err = eb_wait_for_ring(&eb); /* may temporarily drop struct_mutex */ if (unlikely(err)) goto err_engine; @@ -2790,6 +2892,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, } } + if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) { + eb.oa_vma = i915_vma_instance(eb.oa_bo, + &eb.engine->i915->ggtt.vm, NULL); + if (unlikely(IS_ERR(eb.oa_vma))) { + err = PTR_ERR(eb.oa_vma); + eb.oa_vma = NULL; + goto err_request; + } + + err = i915_vma_pin(eb.oa_vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_request; + } + /* * Whilst this request exists, batch_obj will be on the * active_list, and so will hold the active reference. Only when this @@ -2834,7 +2950,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, i915_gem_context_put(eb.gem_context); err_destroy: eb_destroy(&eb); -err_out_fence: +err_oa_config: + if (eb.oa_config) { + i915_gem_object_put(eb.oa_bo); + i915_oa_config_put(eb.oa_config); + } + if (eb.oa_vma) + i915_vma_unpin(eb.oa_vma); if (out_fence_fd != -1) put_unused_fd(out_fence_fd); err_exec_fence: diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index bdf279fa3b2e..ff51af2db2e7 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -858,6 +858,8 @@ int intel_engine_init_common(struct intel_engine_cs *engine) engine->set_default_submission(engine); + INIT_ACTIVE_REQUEST(&engine->last_oa_config); + return 0; err_unpin: diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index 7e056114344e..39da40937e7f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -363,6 +363,8 @@ struct intel_engine_cs { struct i915_wa_list wa_list; struct i915_wa_list whitelist; + struct i915_active_request last_oa_config; + u32 irq_keep_mask; /* always keep these interrupts */ u32 irq_enable_mask; /* bitmask to enable ring interrupt */ void (*irq_enable)(struct intel_engine_cs *engine); @@ -446,6 +448,7 @@ struct intel_engine_cs { #define I915_ENGINE_HAS_SEMAPHORES BIT(3) #define I915_ENGINE_NEEDS_BREADCRUMB_TASKLET BIT(4) #define I915_ENGINE_IS_VIRTUAL BIT(5) +#define I915_ENGINE_HAS_OA BIT(6) unsigned int flags; /* @@ -541,6 +544,12 @@ intel_engine_is_virtual(const struct intel_engine_cs *engine) return engine->flags & I915_ENGINE_IS_VIRTUAL; } +static inline bool +intel_engine_has_oa(const struct intel_engine_cs *engine) +{ + return engine->flags & I915_ENGINE_HAS_OA; +} + #define instdone_slice_mask(dev_priv__) \ (IS_GEN(dev_priv__, 7) ? \ 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 270ef417dd1a..71ce8d9275fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -2816,6 +2816,7 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine) engine->init_context = gen8_init_rcs_context; engine->emit_flush = gen8_emit_flush_render; engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + engine->flags |= I915_ENGINE_HAS_OA; } return 0; diff --git a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c index a98652e4055c..0d1334ab447b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/gt/intel_ringbuffer.c @@ -2205,8 +2205,10 @@ static void setup_rcs(struct intel_engine_cs *engine) engine->irq_enable_mask = I915_USER_INTERRUPT; } - if (IS_HASWELL(i915)) + if (IS_HASWELL(i915)) { engine->emit_bb_start = hsw_emit_bb_start; + engine->flags |= I915_ENGINE_HAS_OA; + } engine->resume = rcs_resume; } diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 94062188c523..f767753ccf51 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -487,6 +487,10 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data, case I915_PARAM_PERF_REVISION: value = i915_perf_ioctl_version(); break; + case I915_PARAM_HAS_EXEC_PERF_CONFIG: + /* Obviously requires perf support. */ + value = dev_priv->perf.initialized; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index b3c6dd72c7a1..e898a3593b8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1116,7 +1116,7 @@ struct i915_oa_config { struct list_head vma_link; - atomic_t ref_count; + struct kref ref; }; struct i915_perf_stream; @@ -2614,6 +2614,12 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, int metrics_set, struct i915_oa_config **out_config, struct drm_i915_gem_object **out_obj); +void i915_oa_config_release(struct kref *ref); + +static inline void i915_oa_config_put(struct i915_oa_config *oa_config) +{ + kref_put(&oa_config->ref, i915_oa_config_release); +} /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 7ae2011cc98a..5ed7771d3cba 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -367,10 +367,9 @@ struct perf_open_properties { int oa_period_exponent; }; -static void put_oa_config(struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + struct i915_oa_config *oa_config = container_of(ref, typeof(*oa_config), ref); if (oa_config->obj) { struct drm_i915_private *i915 = oa_config->i915; @@ -488,7 +487,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, } if (out_config) { - atomic_inc(&oa_config->ref_count); + kref_get(&oa_config->ref); *out_config = oa_config; } @@ -510,7 +509,7 @@ int i915_perf_get_oa_config(struct drm_i915_private *i915, mutex_unlock(&i915->perf.metrics_lock); if (ret && out_config) { - put_oa_config(oa_config); + i915_oa_config_put(oa_config); *out_config = NULL; } @@ -1484,7 +1483,7 @@ static void i915_oa_stream_destroy(struct i915_perf_stream *stream) if (stream->ctx) oa_put_render_ctx_id(stream); - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); if (dev_priv->perf.oa.spurious_report_rs.missed) { DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", @@ -2460,7 +2459,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, free_oa_buffer(dev_priv); err_oa_buf_alloc: - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); @@ -2470,7 +2469,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream, mutex_unlock(&dev_priv->drm.struct_mutex); err_noa_wait_alloc: - put_oa_config(stream->oa_config); + i915_oa_config_put(stream->oa_config); err_config: if (stream->ctx) @@ -3285,7 +3284,7 @@ void i915_perf_register(struct drm_i915_private *dev_priv) goto sysfs_error; dev_priv->perf.oa.test_config.i915 = dev_priv; - atomic_set(&dev_priv->perf.oa.test_config.ref_count, 1); + kref_init(&dev_priv->perf.oa.test_config.ref); goto exit; @@ -3542,7 +3541,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, } oa_config->i915 = dev_priv; - atomic_set(&oa_config->ref_count, 1); + kref_init(&oa_config->ref); if (!uuid_is_valid(args->uuid)) { DRM_DEBUG("Invalid uuid format for OA config\n"); @@ -3641,7 +3640,7 @@ int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, sysfs_err: mutex_unlock(&dev_priv->perf.metrics_lock); reg_err: - put_oa_config(oa_config); + i915_oa_config_put(oa_config); DRM_DEBUG("Failed to add new OA config\n"); return err; } @@ -3697,7 +3696,7 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); - put_oa_config(oa_config); + i915_oa_config_put(oa_config); return 0; @@ -3867,7 +3866,7 @@ static int destroy_config(int id, void *p, void *data) { struct i915_oa_config *oa_config = p; - put_oa_config(oa_config); + i915_oa_config_put(oa_config); return 0; } diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index e5ac1174c01e..e6f7f738490e 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -624,6 +624,16 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* + * Request an i915/perf performance configuration change before running the + * commands given in an execbuf. + * + * Performance configuration ID and the file descriptor of the i915 perf + * stream are given through drm_i915_gem_execbuffer_ext_perf. See + * I915_EXEC_EXT. + */ +#define I915_PARAM_HAS_EXEC_PERF_CONFIG 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1026,6 +1036,12 @@ enum drm_i915_gem_execbuffer_ext { */ DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES = 0, + /** + * This identifier is associated with + * drm_i915_gem_execbuffer_perf_ext. + */ + DRM_I915_GEM_EXECBUFFER_EXT_PERF, + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ }; @@ -1056,6 +1072,27 @@ struct drm_i915_gem_execbuffer_ext_timeline_fences { __u64 values_ptr; }; +struct drm_i915_gem_execbuffer_ext_perf { + struct i915_user_extension base; + + /** + * Performance file descriptor returned by DRM_IOCTL_I915_PERF_OPEN. + * This is used to identify that the application + */ + __s32 perf_fd; + + /** + * Unused for now. Must be cleared to zero. + */ + __u32 pad; + + /** + * OA configuration ID to switch to before executing the commands + * associated to the execbuf. + */ + __u64 oa_config; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs -- 2.22.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx