[Intel-gfx] [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily

2019-10-11 Thread Chris Wilson
From: Lionel Landwerlin 

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 107 +++
 drivers/gpu/drm/i915/i915_perf.h |  24 +
 drivers/gpu/drm/i915/i915_perf_types.h   |  23 ++--
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO   (1<<19)
 #define   MI_LRI_FORCE_POSTED  (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0b51ab3ab523..5fa0df46fcc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+   struct llist_node node;
+
+   struct i915_oa_config *oa_config;
+   struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+   struct i915_oa_config *oa_config =
+   container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
kfree(oa_config->b_counter_regs);
if (!PTR_ERR(oa_config->mux_regs))
kfree(oa_config->mux_regs);
-   kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
-   if (!atomic_dec_and_test(&oa_config->ref_count))
-   return;
 
-   free_oa_config(oa_config);
+   kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct i915_perf *perf,
-int metrics_set,
-struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-   int ret;
-
-   if (metrics_set == 1) {
-   *out_config = &perf->test_config;
-   atomic_inc(&perf->test_config.ref_count);
-   return 0;
-   }
-
-   ret = mutex_lock_interruptible(&perf->metrics_lock);
-   if (ret)
-   return ret;
+   struct i915_oa_config *oa_config;
 
-   *out_config = idr_find(&perf->metrics_idr, metrics_set);
-   if (!*out_config)
-   ret = -EINVAL;
+   rcu_read_lock();
+   if (metrics_set == 1)
+   oa_config = &perf->test_config;
else
-   atomic_inc(&(*out_config)->ref_count);
+   oa_config = idr_find(&perf->metrics_idr, metrics_set);
+   if (oa_config)
+   oa_config = i915_oa_config_get(oa_config);
+   rcu_read_unlock();
 
-   mutex_unlock(&perf->metrics_lock);
+   return oa_config;
+}
 
-   return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+   i915_oa_config_put(oa_bo->oa_config);
+   i915_vma_put(oa_bo->vma);
+   kfree(oa_bo);
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+   struct i915_oa_config_bo *oa_bo, *tmp;
+
+   i915_oa_config_put(stream->oa_config);
+   llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.f

[Intel-gfx] [CI 3/9] drm/i915/perf: allow for CS OA configs to be created lazily

2019-10-10 Thread Chris Wilson
From: Lionel Landwerlin 

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

v2: No need for locking on object OA config object creation (Chris)
Flush cpu mapping of OA config (Chris)

v3: Properly deal with the perf_metric lock (Chris/Lionel)

v4: Fix oa config unref/put when not found (Lionel)

v5: Allocate BOs for configurations on the stream instead of globally
(Lionel)

v6: Fix 64bit division (Chris)

v7: Store allocated config BOs into the stream (Lionel)

Signed-off-by: Lionel Landwerlin 
Reviewed-by: Chris Wilson  (v4)
Signed-off-by: Chris Wilson  (v4)
---
 drivers/gpu/drm/i915/gt/intel_gpu_commands.h |   1 +
 drivers/gpu/drm/i915/i915_perf.c | 107 +++
 drivers/gpu/drm/i915/i915_perf.h |  24 +
 drivers/gpu/drm/i915/i915_perf_types.h   |  23 ++--
 4 files changed, 102 insertions(+), 53 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h 
b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index b0227ab2fe1b..0987100c786b 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -138,6 +138,7 @@
 /* Gen11+. addr = base + (ctx_restore ? offset & GENMASK(12,2) : offset) */
 #define   MI_LRI_CS_MMIO   (1<<19)
 #define   MI_LRI_FORCE_POSTED  (1<<12)
+#define MI_LOAD_REGISTER_IMM_MAX_REGS (126)
 #define MI_STORE_REGISTER_MEMMI_INSTR(0x24, 1)
 #define MI_STORE_REGISTER_MEM_GEN8   MI_INSTR(0x24, 2)
 #define   MI_SRM_LRM_GLOBAL_GTT(1<<22)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 0b51ab3ab523..5fa0df46fcc3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -369,52 +369,52 @@ struct perf_open_properties {
struct intel_engine_cs *engine;
 };
 
+struct i915_oa_config_bo {
+   struct llist_node node;
+
+   struct i915_oa_config *oa_config;
+   struct i915_vma *vma;
+};
+
 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer);
 
-static void free_oa_config(struct i915_oa_config *oa_config)
+void i915_oa_config_release(struct kref *ref)
 {
+   struct i915_oa_config *oa_config =
+   container_of(ref, typeof(*oa_config), ref);
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
kfree(oa_config->b_counter_regs);
if (!PTR_ERR(oa_config->mux_regs))
kfree(oa_config->mux_regs);
-   kfree(oa_config);
-}
-
-static void put_oa_config(struct i915_oa_config *oa_config)
-{
-   if (!atomic_dec_and_test(&oa_config->ref_count))
-   return;
 
-   free_oa_config(oa_config);
+   kfree_rcu(oa_config, rcu);
 }
 
-static int get_oa_config(struct i915_perf *perf,
-int metrics_set,
-struct i915_oa_config **out_config)
+struct i915_oa_config *
+i915_perf_get_oa_config(struct i915_perf *perf, int metrics_set)
 {
-   int ret;
-
-   if (metrics_set == 1) {
-   *out_config = &perf->test_config;
-   atomic_inc(&perf->test_config.ref_count);
-   return 0;
-   }
-
-   ret = mutex_lock_interruptible(&perf->metrics_lock);
-   if (ret)
-   return ret;
+   struct i915_oa_config *oa_config;
 
-   *out_config = idr_find(&perf->metrics_idr, metrics_set);
-   if (!*out_config)
-   ret = -EINVAL;
+   rcu_read_lock();
+   if (metrics_set == 1)
+   oa_config = &perf->test_config;
else
-   atomic_inc(&(*out_config)->ref_count);
+   oa_config = idr_find(&perf->metrics_idr, metrics_set);
+   if (oa_config)
+   oa_config = i915_oa_config_get(oa_config);
+   rcu_read_unlock();
 
-   mutex_unlock(&perf->metrics_lock);
+   return oa_config;
+}
 
-   return ret;
+static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo)
+{
+   i915_oa_config_put(oa_bo->oa_config);
+   i915_vma_put(oa_bo->vma);
+   kfree(oa_bo);
 }
 
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
@@ -1337,6 +1337,16 @@ free_oa_buffer(struct i915_perf_stream *stream)
stream->oa_buffer.vaddr = NULL;
 }
 
+static void
+free_oa_configs(struct i915_perf_stream *stream)
+{
+   struct i915_oa_config_bo *oa_bo, *tmp;
+
+   i915_oa_config_put(stream->oa_config);
+   llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.f