Re: [Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily

2018-10-08 Thread Lionel Landwerlin

On 08/10/2018 16:34, Chris Wilson wrote:

Quoting Lionel Landwerlin (2018-10-08 16:18:21)

Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

Signed-off-by: Lionel Landwerlin 
---
  drivers/gpu/drm/i915/i915_drv.h   |  22 ++-
  drivers/gpu/drm/i915/i915_perf.c  | 195 ++
  drivers/gpu/drm/i915/intel_gpu_commands.h |   1 +
  3 files changed, 187 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2264b30ce51a..a35715cd7608 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1378,6 +1378,10 @@ struct i915_oa_config {
 struct attribute *attrs[2];
 struct device_attribute sysfs_metric_id;
  
+   struct i915_vma *vma;

+
+   struct list_head vma_link;
+
 atomic_t ref_count;
  };
  
@@ -1979,11 +1983,21 @@ struct drm_i915_private {

 struct mutex metrics_lock;
  
 /*

-* List of dynamic configurations, you need to hold
-* dev_priv->perf.metrics_lock to access it.
+* List of dynamic configurations (struct i915_oa_config), you
+* need to hold dev_priv->perf.metrics_lock to access it.
  */
 struct idr metrics_idr;
  
+   /*

+* List of dynamic configurations (struct i915_oa_config)
+* which have an allocated buffer in GGTT for reconfiguration,
+* you need to hold dev_priv->perf.metrics_lock to access it.
+* Elements are added to the list lazilly on execbuf (when a
+* particular configuration is requested). The list is freed
+* upon closing the perf stream.
+*/
+   struct list_head metrics_buffers;
+
 /*
  * Lock associated with anything below within this structure
  * except exclusive_stream.
@@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device 
*dev, void *data,
  void i915_oa_init_reg_state(struct intel_engine_cs *engine,
 struct i915_gem_context *ctx,
 uint32_t *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+   int metrics_set,
+   struct i915_oa_config **out_config,
+   struct i915_vma **out_vma);
  
  /* i915_gem_evict.c */

  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e2a96b6844fe..39c5b44862d4 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -364,9 +364,16 @@ struct perf_open_properties {
 int oa_period_exponent;
  };
  
-static void free_oa_config(struct drm_i915_private *dev_priv,

-  struct i915_oa_config *oa_config)
+static void put_oa_config(struct i915_oa_config *oa_config)
  {
+   if (!atomic_dec_and_test(_config->ref_count))
+   return;
+
+   if (oa_config->vma) {
+   list_del(_config->vma_link);
+   i915_vma_put(oa_config->vma);
+   }
+
 if (!PTR_ERR(oa_config->flex_regs))
 kfree(oa_config->flex_regs);
 if (!PTR_ERR(oa_config->b_counter_regs))
@@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private 
*dev_priv,
 kfree(oa_config);
  }
  
-static void put_oa_config(struct drm_i915_private *dev_priv,

- struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 
n_regs)
  {
-   if (!atomic_dec_and_test(_config->ref_count))
-   return;
+   u32 i;
  
-   free_oa_config(dev_priv, oa_config);

+   for (i = 0; i < n_regs; i++) {
+   if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+   u32 n_lri = min(n_regs - i,
+   (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+   *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+   }
+   *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+   *cs++ = reg_data[i].value;
+   }
+
+   return cs;
  }
  
-static int get_oa_config(struct drm_i915_private *dev_priv,

-int metrics_set,
-struct i915_oa_config **out_config)
+static int 

Re: [Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily

2018-10-08 Thread Chris Wilson
Quoting Lionel Landwerlin (2018-10-08 16:18:21)
> Here we introduce a mechanism by which the execbuf part of the i915
> driver will be able to request that a batch buffer containing the
> programming for a particular OA config be created.
> 
> We'll execute these OA configuration buffers right before executing a
> set of userspace commands so that a particular user batchbuffer be
> executed with a given OA configuration.
> 
> This mechanism essentially allows the userspace driver to go through
> several OA configuration without having to open/close the i915/perf
> stream.
> 
> Signed-off-by: Lionel Landwerlin 
> ---
>  drivers/gpu/drm/i915/i915_drv.h   |  22 ++-
>  drivers/gpu/drm/i915/i915_perf.c  | 195 ++
>  drivers/gpu/drm/i915/intel_gpu_commands.h |   1 +
>  3 files changed, 187 insertions(+), 31 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2264b30ce51a..a35715cd7608 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1378,6 +1378,10 @@ struct i915_oa_config {
> struct attribute *attrs[2];
> struct device_attribute sysfs_metric_id;
>  
> +   struct i915_vma *vma;
> +
> +   struct list_head vma_link;
> +
> atomic_t ref_count;
>  };
>  
> @@ -1979,11 +1983,21 @@ struct drm_i915_private {
> struct mutex metrics_lock;
>  
> /*
> -* List of dynamic configurations, you need to hold
> -* dev_priv->perf.metrics_lock to access it.
> +* List of dynamic configurations (struct i915_oa_config), you
> +* need to hold dev_priv->perf.metrics_lock to access it.
>  */
> struct idr metrics_idr;
>  
> +   /*
> +* List of dynamic configurations (struct i915_oa_config)
> +* which have an allocated buffer in GGTT for reconfiguration,
> +* you need to hold dev_priv->perf.metrics_lock to access it.
> +* Elements are added to the list lazilly on execbuf (when a
> +* particular configuration is requested). The list is freed
> +* upon closing the perf stream.
> +*/
> +   struct list_head metrics_buffers;
> +
> /*
>  * Lock associated with anything below within this structure
>  * except exclusive_stream.
> @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device 
> *dev, void *data,
>  void i915_oa_init_reg_state(struct intel_engine_cs *engine,
> struct i915_gem_context *ctx,
> uint32_t *reg_state);
> +int i915_perf_get_oa_config(struct drm_i915_private *i915,
> +   int metrics_set,
> +   struct i915_oa_config **out_config,
> +   struct i915_vma **out_vma);
>  
>  /* i915_gem_evict.c */
>  int __must_check i915_gem_evict_something(struct i915_address_space *vm,
> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
> b/drivers/gpu/drm/i915/i915_perf.c
> index e2a96b6844fe..39c5b44862d4 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -364,9 +364,16 @@ struct perf_open_properties {
> int oa_period_exponent;
>  };
>  
> -static void free_oa_config(struct drm_i915_private *dev_priv,
> -  struct i915_oa_config *oa_config)
> +static void put_oa_config(struct i915_oa_config *oa_config)
>  {
> +   if (!atomic_dec_and_test(_config->ref_count))
> +   return;
> +
> +   if (oa_config->vma) {
> +   list_del(_config->vma_link);
> +   i915_vma_put(oa_config->vma);
> +   }
> +
> if (!PTR_ERR(oa_config->flex_regs))
> kfree(oa_config->flex_regs);
> if (!PTR_ERR(oa_config->b_counter_regs))
> @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private 
> *dev_priv,
> kfree(oa_config);
>  }
>  
> -static void put_oa_config(struct drm_i915_private *dev_priv,
> - struct i915_oa_config *oa_config)
> +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 
> n_regs)
>  {
> -   if (!atomic_dec_and_test(_config->ref_count))
> -   return;
> +   u32 i;
>  
> -   free_oa_config(dev_priv, oa_config);
> +   for (i = 0; i < n_regs; i++) {
> +   if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
> +   u32 n_lri = min(n_regs - i,
> +   (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
> +
> +   *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
> +   }
> +   *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
> +   *cs++ = reg_data[i].value;
> +   }
> +
> +   return cs;
>  }
>  
> -static int 

[Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily

2018-10-08 Thread Lionel Landwerlin
Here we introduce a mechanism by which the execbuf part of the i915
driver will be able to request that a batch buffer containing the
programming for a particular OA config be created.

We'll execute these OA configuration buffers right before executing a
set of userspace commands so that a particular user batchbuffer be
executed with a given OA configuration.

This mechanism essentially allows the userspace driver to go through
several OA configuration without having to open/close the i915/perf
stream.

Signed-off-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_drv.h   |  22 ++-
 drivers/gpu/drm/i915/i915_perf.c  | 195 ++
 drivers/gpu/drm/i915/intel_gpu_commands.h |   1 +
 3 files changed, 187 insertions(+), 31 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 2264b30ce51a..a35715cd7608 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1378,6 +1378,10 @@ struct i915_oa_config {
struct attribute *attrs[2];
struct device_attribute sysfs_metric_id;
 
+   struct i915_vma *vma;
+
+   struct list_head vma_link;
+
atomic_t ref_count;
 };
 
@@ -1979,11 +1983,21 @@ struct drm_i915_private {
struct mutex metrics_lock;
 
/*
-* List of dynamic configurations, you need to hold
-* dev_priv->perf.metrics_lock to access it.
+* List of dynamic configurations (struct i915_oa_config), you
+* need to hold dev_priv->perf.metrics_lock to access it.
 */
struct idr metrics_idr;
 
+   /*
+* List of dynamic configurations (struct i915_oa_config)
+* which have an allocated buffer in GGTT for reconfiguration,
+* you need to hold dev_priv->perf.metrics_lock to access it.
+* Elements are added to the list lazilly on execbuf (when a
+* particular configuration is requested). The list is freed
+* upon closing the perf stream.
+*/
+   struct list_head metrics_buffers;
+
/*
 * Lock associated with anything below within this structure
 * except exclusive_stream.
@@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device 
*dev, void *data,
 void i915_oa_init_reg_state(struct intel_engine_cs *engine,
struct i915_gem_context *ctx,
uint32_t *reg_state);
+int i915_perf_get_oa_config(struct drm_i915_private *i915,
+   int metrics_set,
+   struct i915_oa_config **out_config,
+   struct i915_vma **out_vma);
 
 /* i915_gem_evict.c */
 int __must_check i915_gem_evict_something(struct i915_address_space *vm,
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index e2a96b6844fe..39c5b44862d4 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -364,9 +364,16 @@ struct perf_open_properties {
int oa_period_exponent;
 };
 
-static void free_oa_config(struct drm_i915_private *dev_priv,
-  struct i915_oa_config *oa_config)
+static void put_oa_config(struct i915_oa_config *oa_config)
 {
+   if (!atomic_dec_and_test(_config->ref_count))
+   return;
+
+   if (oa_config->vma) {
+   list_del(_config->vma_link);
+   i915_vma_put(oa_config->vma);
+   }
+
if (!PTR_ERR(oa_config->flex_regs))
kfree(oa_config->flex_regs);
if (!PTR_ERR(oa_config->b_counter_regs))
@@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private 
*dev_priv,
kfree(oa_config);
 }
 
-static void put_oa_config(struct drm_i915_private *dev_priv,
- struct i915_oa_config *oa_config)
+static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 
n_regs)
 {
-   if (!atomic_dec_and_test(_config->ref_count))
-   return;
+   u32 i;
 
-   free_oa_config(dev_priv, oa_config);
+   for (i = 0; i < n_regs; i++) {
+   if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) {
+   u32 n_lri = min(n_regs - i,
+   (u32) MI_LOAD_REGISTER_IMM_MAX_REGS);
+
+   *cs++ = MI_LOAD_REGISTER_IMM(n_lri);
+   }
+   *cs++ = i915_mmio_reg_offset(reg_data[i].addr);
+   *cs++ = reg_data[i].value;
+   }
+
+   return cs;
 }
 
-static int get_oa_config(struct drm_i915_private *dev_priv,
-int metrics_set,
-struct i915_oa_config **out_config)
+static int alloc_oa_config_buffer(struct drm_i915_private *i915,
+ struct i915_oa_config *oa_config)
 {
+   struct