[Intel-gfx] [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily
From: Lionel Landwerlin Here we introduce a mechanism by which the execbuf part of the i915 driver will be able to request that a batch buffer containing the programming for a particular OA config be created. We'll execute these OA configuration buffers right before executing a set of userspace commands so that a particular user batchbuffer be executed with a given OA configuration. This mechanism essentially allows the userspace driver to go through several OA configuration without having to open/close the i915/perf stream. v2: No need for locking on object OA config object creation (Chris) Flush cpu mapping of OA config (Chris) v3: Properly deal with the perf_metric lock (Chris/Lionel) v4: Fix oa config unref/put when not found (Lionel) v5: Allocate BOs for configurations on the stream instead of globally (Lionel) v6: Fix 64bit division (Chris) v7: Store allocated config BOs into the stream (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson (v4) Signed-off-by: Chris Wilson (v4) --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 107 +++ drivers/gpu/drm/i915/i915_perf.h | 24 + drivers/gpu/drm/i915/i915_perf_types.h | 23 ++-- 4 files changed, 102 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index b0227ab2fe1b..0987100c786b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -138,6 +138,7 @@ /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ #define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT(1<<22) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b51ab3ab523..5fa0df46fcc3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -369,52 +369,52 @@ struct perf_open_properties { struct intel_engine_cs *engine; }; +struct i915_oa_config_bo { + struct llist_node node; + + struct i915_oa_config *oa_config; + struct i915_vma *vma; +}; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); -static void free_oa_config(struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { + struct i915_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); if (!PTR_ERR(oa_config->b_counter_regs)) kfree(oa_config->b_counter_regs); if (!PTR_ERR(oa_config->mux_regs)) kfree(oa_config->mux_regs); - kfree(oa_config); -} - -static void put_oa_config(struct i915_oa_config *oa_config) -{ - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; - free_oa_config(oa_config); + kfree_rcu(oa_config, rcu); } -static int get_oa_config(struct i915_perf *perf, -int metrics_set, -struct i915_oa_config **out_config) +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) { - int ret; - - if (metrics_set == 1) { - *out_config = &perf->test_config; - atomic_inc(&perf->test_config.ref_count); - return 0; - } - - ret = mutex_lock_interruptible(&perf->metrics_lock); - if (ret) - return ret; + struct i915_oa_config *oa_config; - *out_config = idr_find(&perf->metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; + rcu_read_lock(); + if (metrics_set == 1) + oa_config = &perf->test_config; else - atomic_inc(&(*out_config)->ref_count); + oa_config = idr_find(&perf->metrics_idr, metrics_set); + if (oa_config) + oa_config = i915_oa_config_get(oa_config); + rcu_read_unlock(); - mutex_unlock(&perf->metrics_lock); + return oa_config; +} - return ret; +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) +{ + i915_oa_config_put(oa_bo->oa_config); + i915_vma_put(oa_bo->vma); + kfree(oa_bo); } static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) @@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vaddr = NULL; } +static void +free_oa_configs(struct i915_perf_stream *stream) +{ + struct i915_oa_config_bo *oa_bo, *tmp; + + i915_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.f
[Intel-gfx] [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily
From: Lionel Landwerlin Here we introduce a mechanism by which the execbuf part of the i915 driver will be able to request that a batch buffer containing the programming for a particular OA config be created. We'll execute these OA configuration buffers right before executing a set of userspace commands so that a particular user batchbuffer be executed with a given OA configuration. This mechanism essentially allows the userspace driver to go through several OA configuration without having to open/close the i915/perf stream. v2: No need for locking on object OA config object creation (Chris) Flush cpu mapping of OA config (Chris) v3: Properly deal with the perf_metric lock (Chris/Lionel) v4: Fix oa config unref/put when not found (Lionel) v5: Allocate BOs for configurations on the stream instead of globally (Lionel) v6: Fix 64bit division (Chris) v7: Store allocated config BOs into the stream (Lionel) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson (v4) Signed-off-by: Chris Wilson (v4) --- drivers/gpu/drm/i915/gt/intel_gpu_commands.h | 1 + drivers/gpu/drm/i915/i915_perf.c | 107 +++ drivers/gpu/drm/i915/i915_perf.h | 24 + drivers/gpu/drm/i915/i915_perf_types.h | 23 ++-- 4 files changed, 102 insertions(+), 53 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h index b0227ab2fe1b..0987100c786b 100644 --- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h +++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h @@ -138,6 +138,7 @@ /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */ #define MI_LRI_CS_MMIO (1<<19) #define MI_LRI_FORCE_POSTED (1<<12) +#define MI_LOAD_REGISTER_IMM_MAX_REGS (126) #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1) #define MI_STORE_REGISTER_MEM_GEN8 MI_INSTR(0x24, 2) #define MI_SRM_LRM_GLOBAL_GTT(1<<22) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b51ab3ab523..5fa0df46fcc3 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -369,52 +369,52 @@ struct perf_open_properties { struct intel_engine_cs *engine; }; +struct i915_oa_config_bo { + struct llist_node node; + + struct i915_oa_config *oa_config; + struct i915_vma *vma; +}; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); -static void free_oa_config(struct i915_oa_config *oa_config) +void i915_oa_config_release(struct kref *ref) { + struct i915_oa_config *oa_config = + container_of(ref, typeof(*oa_config), ref); + if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); if (!PTR_ERR(oa_config->b_counter_regs)) kfree(oa_config->b_counter_regs); if (!PTR_ERR(oa_config->mux_regs)) kfree(oa_config->mux_regs); - kfree(oa_config); -} - -static void put_oa_config(struct i915_oa_config *oa_config) -{ - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; - free_oa_config(oa_config); + kfree_rcu(oa_config, rcu); } -static int get_oa_config(struct i915_perf *perf, -int metrics_set, -struct i915_oa_config **out_config) +struct i915_oa_config * +i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set) { - int ret; - - if (metrics_set == 1) { - *out_config = &perf->test_config; - atomic_inc(&perf->test_config.ref_count); - return 0; - } - - ret = mutex_lock_interruptible(&perf->metrics_lock); - if (ret) - return ret; + struct i915_oa_config *oa_config; - *out_config = idr_find(&perf->metrics_idr, metrics_set); - if (!*out_config) - ret = -EINVAL; + rcu_read_lock(); + if (metrics_set == 1) + oa_config = &perf->test_config; else - atomic_inc(&(*out_config)->ref_count); + oa_config = idr_find(&perf->metrics_idr, metrics_set); + if (oa_config) + oa_config = i915_oa_config_get(oa_config); + rcu_read_unlock(); - mutex_unlock(&perf->metrics_lock); + return oa_config; +} - return ret; +static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) +{ + i915_oa_config_put(oa_bo->oa_config); + i915_vma_put(oa_bo->vma); + kfree(oa_bo); } static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) @@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream) stream->oa_buffer.vaddr = NULL; } +static void +free_oa_configs(struct i915_perf_stream *stream) +{ + struct i915_oa_config_bo *oa_bo, *tmp; + + i915_oa_config_put(stream->oa_config); + llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.f