Re: [Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily
On 08/10/2018 16:34, Chris Wilson wrote: Quoting Lionel Landwerlin (2018-10-08 16:18:21) Here we introduce a mechanism by which the execbuf part of the i915 driver will be able to request that a batch buffer containing the programming for a particular OA config be created. We'll execute these OA configuration buffers right before executing a set of userspace commands so that a particular user batchbuffer be executed with a given OA configuration. This mechanism essentially allows the userspace driver to go through several OA configuration without having to open/close the i915/perf stream. Signed-off-by: Lionel Landwerlin --- drivers/gpu/drm/i915/i915_drv.h | 22 ++- drivers/gpu/drm/i915/i915_perf.c | 195 ++ drivers/gpu/drm/i915/intel_gpu_commands.h | 1 + 3 files changed, 187 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2264b30ce51a..a35715cd7608 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1378,6 +1378,10 @@ struct i915_oa_config { struct attribute *attrs[2]; struct device_attribute sysfs_metric_id; + struct i915_vma *vma; + + struct list_head vma_link; + atomic_t ref_count; }; @@ -1979,11 +1983,21 @@ struct drm_i915_private { struct mutex metrics_lock; /* -* List of dynamic configurations, you need to hold -* dev_priv->perf.metrics_lock to access it. +* List of dynamic configurations (struct i915_oa_config), you +* need to hold dev_priv->perf.metrics_lock to access it. */ struct idr metrics_idr; + /* +* List of dynamic configurations (struct i915_oa_config) +* which have an allocated buffer in GGTT for reconfiguration, +* you need to hold dev_priv->perf.metrics_lock to access it. +* Elements are added to the list lazilly on execbuf (when a +* particular configuration is requested). The list is freed +* upon closing the perf stream. +*/ + struct list_head metrics_buffers; + /* * Lock associated with anything below within this structure * except exclusive_stream. @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +int i915_perf_get_oa_config(struct drm_i915_private *i915, + int metrics_set, + struct i915_oa_config **out_config, + struct i915_vma **out_vma); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e2a96b6844fe..39c5b44862d4 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -364,9 +364,16 @@ struct perf_open_properties { int oa_period_exponent; }; -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static void put_oa_config(struct i915_oa_config *oa_config) { + if (!atomic_dec_and_test(&oa_config->ref_count)) + return; + + if (oa_config->vma) { + list_del(&oa_config->vma_link); + i915_vma_put(oa_config->vma); + } + if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); if (!PTR_ERR(oa_config->b_counter_regs)) @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private *dev_priv, kfree(oa_config); } -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + u32 i; - free_oa_config(dev_priv, oa_config); + for (i = 0; i < n_regs; i++) { + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min(n_regs - i, + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); + + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; + } + + return cs; } -static int get_oa_config(struct drm_i915_private *dev_priv, -int metrics_set, -struct i915_oa_config **out_config) +static int
Re: [Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily
Quoting Lionel Landwerlin (2018-10-08 16:18:21) > Here we introduce a mechanism by which the execbuf part of the i915 > driver will be able to request that a batch buffer containing the > programming for a particular OA config be created. > > We'll execute these OA configuration buffers right before executing a > set of userspace commands so that a particular user batchbuffer be > executed with a given OA configuration. > > This mechanism essentially allows the userspace driver to go through > several OA configuration without having to open/close the i915/perf > stream. > > Signed-off-by: Lionel Landwerlin > --- > drivers/gpu/drm/i915/i915_drv.h | 22 ++- > drivers/gpu/drm/i915/i915_perf.c | 195 ++ > drivers/gpu/drm/i915/intel_gpu_commands.h | 1 + > 3 files changed, 187 insertions(+), 31 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 2264b30ce51a..a35715cd7608 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1378,6 +1378,10 @@ struct i915_oa_config { > struct attribute *attrs[2]; > struct device_attribute sysfs_metric_id; > > + struct i915_vma *vma; > + > + struct list_head vma_link; > + > atomic_t ref_count; > }; > > @@ -1979,11 +1983,21 @@ struct drm_i915_private { > struct mutex metrics_lock; > > /* > -* List of dynamic configurations, you need to hold > -* dev_priv->perf.metrics_lock to access it. > +* List of dynamic configurations (struct i915_oa_config), you > +* need to hold dev_priv->perf.metrics_lock to access it. > */ > struct idr metrics_idr; > > + /* > +* List of dynamic configurations (struct i915_oa_config) > +* which have an allocated buffer in GGTT for reconfiguration, > +* you need to hold dev_priv->perf.metrics_lock to access it. > +* Elements are added to the list lazilly on execbuf (when a > +* particular configuration is requested). The list is freed > +* upon closing the perf stream. > +*/ > + struct list_head metrics_buffers; > + > /* > * Lock associated with anything below within this structure > * except exclusive_stream. > @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device > *dev, void *data, > void i915_oa_init_reg_state(struct intel_engine_cs *engine, > struct i915_gem_context *ctx, > uint32_t *reg_state); > +int i915_perf_get_oa_config(struct drm_i915_private *i915, > + int metrics_set, > + struct i915_oa_config **out_config, > + struct i915_vma **out_vma); > > /* i915_gem_evict.c */ > int __must_check i915_gem_evict_something(struct i915_address_space *vm, > diff --git a/drivers/gpu/drm/i915/i915_perf.c > b/drivers/gpu/drm/i915/i915_perf.c > index e2a96b6844fe..39c5b44862d4 100644 > --- a/drivers/gpu/drm/i915/i915_perf.c > +++ b/drivers/gpu/drm/i915/i915_perf.c > @@ -364,9 +364,16 @@ struct perf_open_properties { > int oa_period_exponent; > }; > > -static void free_oa_config(struct drm_i915_private *dev_priv, > - struct i915_oa_config *oa_config) > +static void put_oa_config(struct i915_oa_config *oa_config) > { > + if (!atomic_dec_and_test(&oa_config->ref_count)) > + return; > + > + if (oa_config->vma) { > + list_del(&oa_config->vma_link); > + i915_vma_put(oa_config->vma); > + } > + > if (!PTR_ERR(oa_config->flex_regs)) > kfree(oa_config->flex_regs); > if (!PTR_ERR(oa_config->b_counter_regs)) > @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private > *dev_priv, > kfree(oa_config); > } > > -static void put_oa_config(struct drm_i915_private *dev_priv, > - struct i915_oa_config *oa_config) > +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 > n_regs) > { > - if (!atomic_dec_and_test(&oa_config->ref_count)) > - return; > + u32 i; > > - free_oa_config(dev_priv, oa_config); > + for (i = 0; i < n_regs; i++) { > + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { > + u32 n_lri = min(n_regs - i, > + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); > + > + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); > + } > + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); > + *cs++ = reg_data[i].value; > + } > + > + return cs; > } > > -static int get_oa_
[Intel-gfx] [RFC PATCH 2/3] drm/i915/perf: allow for CS OA configs to be created lazily
Here we introduce a mechanism by which the execbuf part of the i915 driver will be able to request that a batch buffer containing the programming for a particular OA config be created. We'll execute these OA configuration buffers right before executing a set of userspace commands so that a particular user batchbuffer be executed with a given OA configuration. This mechanism essentially allows the userspace driver to go through several OA configuration without having to open/close the i915/perf stream. Signed-off-by: Lionel Landwerlin --- drivers/gpu/drm/i915/i915_drv.h | 22 ++- drivers/gpu/drm/i915/i915_perf.c | 195 ++ drivers/gpu/drm/i915/intel_gpu_commands.h | 1 + 3 files changed, 187 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2264b30ce51a..a35715cd7608 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1378,6 +1378,10 @@ struct i915_oa_config { struct attribute *attrs[2]; struct device_attribute sysfs_metric_id; + struct i915_vma *vma; + + struct list_head vma_link; + atomic_t ref_count; }; @@ -1979,11 +1983,21 @@ struct drm_i915_private { struct mutex metrics_lock; /* -* List of dynamic configurations, you need to hold -* dev_priv->perf.metrics_lock to access it. +* List of dynamic configurations (struct i915_oa_config), you +* need to hold dev_priv->perf.metrics_lock to access it. */ struct idr metrics_idr; + /* +* List of dynamic configurations (struct i915_oa_config) +* which have an allocated buffer in GGTT for reconfiguration, +* you need to hold dev_priv->perf.metrics_lock to access it. +* Elements are added to the list lazilly on execbuf (when a +* particular configuration is requested). The list is freed +* upon closing the perf stream. +*/ + struct list_head metrics_buffers; + /* * Lock associated with anything below within this structure * except exclusive_stream. @@ -3315,6 +3329,10 @@ int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, void i915_oa_init_reg_state(struct intel_engine_cs *engine, struct i915_gem_context *ctx, uint32_t *reg_state); +int i915_perf_get_oa_config(struct drm_i915_private *i915, + int metrics_set, + struct i915_oa_config **out_config, + struct i915_vma **out_vma); /* i915_gem_evict.c */ int __must_check i915_gem_evict_something(struct i915_address_space *vm, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index e2a96b6844fe..39c5b44862d4 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -364,9 +364,16 @@ struct perf_open_properties { int oa_period_exponent; }; -static void free_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static void put_oa_config(struct i915_oa_config *oa_config) { + if (!atomic_dec_and_test(&oa_config->ref_count)) + return; + + if (oa_config->vma) { + list_del(&oa_config->vma_link); + i915_vma_put(oa_config->vma); + } + if (!PTR_ERR(oa_config->flex_regs)) kfree(oa_config->flex_regs); if (!PTR_ERR(oa_config->b_counter_regs)) @@ -376,38 +383,152 @@ static void free_oa_config(struct drm_i915_private *dev_priv, kfree(oa_config); } -static void put_oa_config(struct drm_i915_private *dev_priv, - struct i915_oa_config *oa_config) +static u32 *write_cs_mi_lri(u32 *cs, const struct i915_oa_reg *reg_data, u32 n_regs) { - if (!atomic_dec_and_test(&oa_config->ref_count)) - return; + u32 i; - free_oa_config(dev_priv, oa_config); + for (i = 0; i < n_regs; i++) { + if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { + u32 n_lri = min(n_regs - i, + (u32) MI_LOAD_REGISTER_IMM_MAX_REGS); + + *cs++ = MI_LOAD_REGISTER_IMM(n_lri); + } + *cs++ = i915_mmio_reg_offset(reg_data[i].addr); + *cs++ = reg_data[i].value; + } + + return cs; } -static int get_oa_config(struct drm_i915_private *dev_priv, -int metrics_set, -struct i915_oa_config **out_config) +static int alloc_oa_config_buffer(struct drm_i915_private *i915, + struct i915_oa_config *oa_config) { + struct