Re: [Intel-gfx] [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream

2019-09-02 Thread Lionel Landwerlin

On 30/08/2019 18:48, Chris Wilson wrote:

Quoting Lionel Landwerlin (2019-08-30 15:47:23)

  err_unpin:
-   __i915_vma_unpin(vma);
+   mutex_lock(&i915->drm.struct_mutex);
+   i915_vma_unpin_and_release(&vma, 0);
+   mutex_unlock(&i915->drm.struct_mutex);

Strangely unpin_and_release() doesn't need the mutex. But I can clean
that up later.

  
  err_unref:

 i915_gem_object_put(bo);
@@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
 return ret;
  }
  
-static void config_oa_regs(struct drm_i915_private *dev_priv,

-  const struct i915_oa_reg *regs,
-  u32 n_regs)
+static int emit_oa_config(struct drm_i915_private *i915,
+ struct i915_perf_stream *stream)
  {
-   u32 i;
+   struct i915_request *rq;
+   struct i915_vma *vma;
+   u32 *cs;
+   int err;
  
-   for (i = 0; i < n_regs; i++) {

-   const struct i915_oa_reg *reg = regs + i;
+   lockdep_assert_held(&stream->config_mutex);
  
-   I915_WRITE(reg->addr, reg->value);

+   rq = i915_request_create(stream->engine->kernel_context);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   err = i915_active_request_set(&stream->active_config_rq,
+ rq);
+   if (err)
+   goto err_add_request;
+
+   vma = i915_vma_instance(stream->initial_oa_config_bo,
+   &i915->ggtt.vm, NULL);

Safer with stream->engine->gt->ggtt.vm


+   if (unlikely(IS_ERR(vma))) {
+   err = PTR_ERR(vma);
+   goto err_add_request;
 }
-}
  
-static void delay_after_mux(void)

-{
-   /*
-* It apparently takes a fairly long time for a new MUX
-* configuration to be be applied after these register writes.
-* This delay duration was derived empirically based on the
-* render_basic config but hopefully it covers the maximum
-* configuration latency.
-*
-* As a fallback, the checks in _append_oa_reports() to skip
-* invalid OA reports do also seem to work to discard reports
-* generated before this config has completed - albeit not
-* silently.
-*
-* Unfortunately this is essentially a magic number, since we
-* don't currently know of a reliable mechanism for predicting
-* how long the MUX config will take to apply and besides
-* seeing invalid reports we don't know of a reliable way to
-* explicitly check that the MUX config has landed.
-*
-* It's even possible we've miss characterized the underlying
-* problem - it just seems like the simplest explanation why
-* a delay at this location would mitigate any invalid reports.
-*/
-   usleep_range(15000, 2);
+   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+   if (err)
+   goto err_add_request;

Don't pin inside a request. do the pining before i915_request_create().
One day, we may need to allocate a request to do the pin.

Be safe,

i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
(yes, we need a better wrapper here)
if (err)

+   err = i915_vma_move_to_active(vma, rq, 0);

i915_vma_unlock(vma);

+   if (err)
+   goto err_vma_unpin;
+



@@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 stream->ops = &i915_oa_stream_ops;
 dev_priv->perf.exclusive_stream = stream;
  
+   mutex_lock(&stream->config_mutex);

 ret = dev_priv->perf.ops.enable_metric_set(stream);
 if (ret) {
 DRM_DEBUG("Unable to enable metric set\n");
-   goto err_enable;
+   /*
+* Ignore the return value since we already have an error from
+* the enable vfunc.
+*/
+   i915_active_request_retire(&stream->active_config_rq,
+  &stream->config_mutex);
+   } else {
+   ret = i915_active_request_retire(&stream->active_config_rq,
+&stream->config_mutex);

This function doesn't exist anymore. It's basically just waiting for the
old request. Why do you need it? (Your request flow is otherwise ordered.)



I don't want to enable the OA reports to be written into the OA buffer 
until I know the configuration work has completed.


Otherwise it would write invalid/unconfigured data.





-   DRM_DEBUG("opening stream oa config uuid=%s\n", 
stream->oa_config->uuid);
-
+   mutex_unlock(&stream->config_mutex);
 mutex_unlock(&dev_priv->drm.struct_mutex);
  
+   i915_gem_object_put(stream->initial_oa_config_bo);

+   stream->initial_oa_config_bo = NULL;
+   if (ret)
+   goto err_enable;

Is it because of this err that may end up freei

Re: [Intel-gfx] [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream

2019-08-30 Thread kbuild test robot
Hi Lionel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-h002-201934 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_perf.c: In function 'i915_oa_stream_init':
>> drivers/gpu/drm/i915/i915_perf.c:2695:3: error: ignoring return value of 
>> 'i915_active_request_retire', declared with attribute warn_unused_result 
>> [-Werror=unused-result]
  i915_active_request_retire(&stream->active_config_rq,
  ^
&stream->config_mutex);
~~
   cc1: all warnings being treated as errors

vim +/i915_active_request_retire +2695 drivers/gpu/drm/i915/i915_perf.c

  2553  
  2554  /**
  2555   * i915_oa_stream_init - validate combined props for OA stream and init
  2556   * @stream: An i915 perf stream
  2557   * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
  2558   * @props: The property state that configures stream (individually 
validated)
  2559   *
  2560   * While read_properties_unlocked() validates properties in isolation it
  2561   * doesn't ensure that the combination necessarily makes sense.
  2562   *
  2563   * At this point it has been determined that userspace wants a stream of
  2564   * OA metrics, but still we need to further validate the combined
  2565   * properties are OK.
  2566   *
  2567   * If the configuration makes sense then we can allocate memory for
  2568   * a circular OA buffer and apply the requested metric set 
configuration.
  2569   *
  2570   * Returns: zero on success or a negative error code.
  2571   */
  2572  static int i915_oa_stream_init(struct i915_perf_stream *stream,
  2573 struct drm_i915_perf_open_param *param,
  2574 struct perf_open_properties *props)
  2575  {
  2576  struct drm_i915_private *dev_priv = stream->dev_priv;
  2577  int format_size;
  2578  int ret;
  2579  
  2580  /* If the sysfs metrics/ directory wasn't registered for some
  2581   * reason then don't let userspace try their luck with config
  2582   * IDs
  2583   */
  2584  if (!dev_priv->perf.metrics_kobj) {
  2585  DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  2586  return -EINVAL;
  2587  }
  2588  
  2589  if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
  2590  DRM_DEBUG("Only OA report sampling supported\n");
  2591  return -EINVAL;
  2592  }
  2593  
  2594  if (!dev_priv->perf.ops.enable_metric_set) {
  2595  DRM_DEBUG("OA unit not supported\n");
  2596  return -ENODEV;
  2597  }
  2598  
  2599  /* To avoid the complexity of having to accurately filter
  2600   * counter reports and marshal to the appropriate client
  2601   * we currently only allow exclusive access
  2602   */
  2603  if (dev_priv->perf.exclusive_stream) {
  2604  DRM_DEBUG("OA unit already in use\n");
  2605  return -EBUSY;
  2606  }
  2607  
  2608  if (!props->oa_format) {
  2609  DRM_DEBUG("OA report format not specified\n");
  2610  return -EINVAL;
  2611  }
  2612  
  2613  mutex_init(&stream->config_mutex);
  2614  
  2615  stream->sample_size = sizeof(struct 
drm_i915_perf_record_header);
  2616  
  2617  format_size = dev_priv->perf.oa_formats[props->oa_format].size;
  2618  
  2619  stream->engine = props->engine;
  2620  
  2621  mutex_init(&stream->config_mutex);
  2622  INIT_ACTIVE_REQUEST(&stream->active_config_rq,
  2623  &stream->config_mutex);
  2624  
  2625  stream->sample_flags |= SAMPLE_OA_REPORT;
  2626  stream->sample_size += format_size;
  2627  
  2628  stream->oa_buffer.format_size = format_size;
  2629  if (WARN_ON(stream->oa_buffer.format_size == 0))
  2630  return -EINVAL;
  2631  
  2632  stream->oa_buffer.format =
  2633  dev_priv->perf.oa_formats[props->oa_format].format;
  2634  
  2635  stream->periodic = props->oa_periodic;
  2636  if (stream->periodic)
  2637

Re: [Intel-gfx] [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream

2019-08-30 Thread kbuild test robot
Hi Lionel,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on drm-intel/for-linux-next]
[cannot apply to v5.3-rc6 next-20190830]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190831-033234
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-allyesconfig (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All warnings (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_perf.c: In function 'i915_oa_stream_init':
>> drivers/gpu/drm/i915/i915_perf.c:2695:3: warning: ignoring return value of 
>> 'i915_active_request_retire', declared with attribute warn_unused_result 
>> [-Wunused-result]
  i915_active_request_retire(&stream->active_config_rq,
  ^
&stream->config_mutex);
~~

vim +/i915_active_request_retire +2695 drivers/gpu/drm/i915/i915_perf.c

  2553  
  2554  /**
  2555   * i915_oa_stream_init - validate combined props for OA stream and init
  2556   * @stream: An i915 perf stream
  2557   * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
  2558   * @props: The property state that configures stream (individually 
validated)
  2559   *
  2560   * While read_properties_unlocked() validates properties in isolation it
  2561   * doesn't ensure that the combination necessarily makes sense.
  2562   *
  2563   * At this point it has been determined that userspace wants a stream of
  2564   * OA metrics, but still we need to further validate the combined
  2565   * properties are OK.
  2566   *
  2567   * If the configuration makes sense then we can allocate memory for
  2568   * a circular OA buffer and apply the requested metric set 
configuration.
  2569   *
  2570   * Returns: zero on success or a negative error code.
  2571   */
  2572  static int i915_oa_stream_init(struct i915_perf_stream *stream,
  2573 struct drm_i915_perf_open_param *param,
  2574 struct perf_open_properties *props)
  2575  {
  2576  struct drm_i915_private *dev_priv = stream->dev_priv;
  2577  int format_size;
  2578  int ret;
  2579  
  2580  /* If the sysfs metrics/ directory wasn't registered for some
  2581   * reason then don't let userspace try their luck with config
  2582   * IDs
  2583   */
  2584  if (!dev_priv->perf.metrics_kobj) {
  2585  DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  2586  return -EINVAL;
  2587  }
  2588  
  2589  if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
  2590  DRM_DEBUG("Only OA report sampling supported\n");
  2591  return -EINVAL;
  2592  }
  2593  
  2594  if (!dev_priv->perf.ops.enable_metric_set) {
  2595  DRM_DEBUG("OA unit not supported\n");
  2596  return -ENODEV;
  2597  }
  2598  
  2599  /* To avoid the complexity of having to accurately filter
  2600   * counter reports and marshal to the appropriate client
  2601   * we currently only allow exclusive access
  2602   */
  2603  if (dev_priv->perf.exclusive_stream) {
  2604  DRM_DEBUG("OA unit already in use\n");
  2605  return -EBUSY;
  2606  }
  2607  
  2608  if (!props->oa_format) {
  2609  DRM_DEBUG("OA report format not specified\n");
  2610  return -EINVAL;
  2611  }
  2612  
  2613  mutex_init(&stream->config_mutex);
  2614  
  2615  stream->sample_size = sizeof(struct 
drm_i915_perf_record_header);
  2616  
  2617  format_size = dev_priv->perf.oa_formats[props->oa_format].size;
  2618  
  2619  stream->engine = props->engine;
  2620  
  2621  mutex_init(&stream->config_mutex);
  2622  INIT_ACTIVE_REQUEST(&stream->active_config_rq,
  2623  &stream->config_mutex);
  2624  
  2625  stream->sample_flags |= SAMPLE_OA_REPORT;
  2626  stream->sample_size += format_size;
  2627  
  2628  stream->oa_buffer.format_size = format_size;
  2629  if (WARN_ON(stream->oa_buffer.format_size == 0))
  2630  return -EINVAL;
  2631  
  2632  stream->oa_buffer.format =
  2633  dev_priv->perf.oa_formats[props->oa_format].format;
  2634  
  2635  stream->periodic = props->oa_periodic;
  2636  if (stream->periodic)
  2637  stream->period_exponent = props->oa_p

Re: [Intel-gfx] [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream

2019-08-30 Thread Lionel Landwerlin

On 30/08/2019 18:48, Chris Wilson wrote:

Quoting Lionel Landwerlin (2019-08-30 15:47:23)

  err_unpin:
-   __i915_vma_unpin(vma);
+   mutex_lock(&i915->drm.struct_mutex);
+   i915_vma_unpin_and_release(&vma, 0);
+   mutex_unlock(&i915->drm.struct_mutex);

Strangely unpin_and_release() doesn't need the mutex. But I can clean
that up later.

  
  err_unref:

 i915_gem_object_put(bo);
@@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
 return ret;
  }
  
-static void config_oa_regs(struct drm_i915_private *dev_priv,

-  const struct i915_oa_reg *regs,
-  u32 n_regs)
+static int emit_oa_config(struct drm_i915_private *i915,
+ struct i915_perf_stream *stream)
  {
-   u32 i;
+   struct i915_request *rq;
+   struct i915_vma *vma;
+   u32 *cs;
+   int err;
  
-   for (i = 0; i < n_regs; i++) {

-   const struct i915_oa_reg *reg = regs + i;
+   lockdep_assert_held(&stream->config_mutex);
  
-   I915_WRITE(reg->addr, reg->value);

+   rq = i915_request_create(stream->engine->kernel_context);
+   if (IS_ERR(rq))
+   return PTR_ERR(rq);
+
+   err = i915_active_request_set(&stream->active_config_rq,
+ rq);
+   if (err)
+   goto err_add_request;
+
+   vma = i915_vma_instance(stream->initial_oa_config_bo,
+   &i915->ggtt.vm, NULL);

Safer with stream->engine->gt->ggtt.vm


+   if (unlikely(IS_ERR(vma))) {
+   err = PTR_ERR(vma);
+   goto err_add_request;
 }
-}
  
-static void delay_after_mux(void)

-{
-   /*
-* It apparently takes a fairly long time for a new MUX
-* configuration to be be applied after these register writes.
-* This delay duration was derived empirically based on the
-* render_basic config but hopefully it covers the maximum
-* configuration latency.
-*
-* As a fallback, the checks in _append_oa_reports() to skip
-* invalid OA reports do also seem to work to discard reports
-* generated before this config has completed - albeit not
-* silently.
-*
-* Unfortunately this is essentially a magic number, since we
-* don't currently know of a reliable mechanism for predicting
-* how long the MUX config will take to apply and besides
-* seeing invalid reports we don't know of a reliable way to
-* explicitly check that the MUX config has landed.
-*
-* It's even possible we've miss characterized the underlying
-* problem - it just seems like the simplest explanation why
-* a delay at this location would mitigate any invalid reports.
-*/
-   usleep_range(15000, 2);
+   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+   if (err)
+   goto err_add_request;

Don't pin inside a request. do the pining before i915_request_create().
One day, we may need to allocate a request to do the pin.

Be safe,

i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
(yes, we need a better wrapper here)
if (err)

+   err = i915_vma_move_to_active(vma, rq, 0);

i915_vma_unlock(vma);

+   if (err)
+   goto err_vma_unpin;
+



@@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct i915_perf_stream 
*stream,
 stream->ops = &i915_oa_stream_ops;
 dev_priv->perf.exclusive_stream = stream;
  
+   mutex_lock(&stream->config_mutex);

 ret = dev_priv->perf.ops.enable_metric_set(stream);
 if (ret) {
 DRM_DEBUG("Unable to enable metric set\n");
-   goto err_enable;
+   /*
+* Ignore the return value since we already have an error from
+* the enable vfunc.
+*/
+   i915_active_request_retire(&stream->active_config_rq,
+  &stream->config_mutex);
+   } else {
+   ret = i915_active_request_retire(&stream->active_config_rq,
+&stream->config_mutex);

This function doesn't exist anymore. It's basically just waiting for the
old request. Why do you need it? (Your request flow is otherwise ordered.)


-   DRM_DEBUG("opening stream oa config uuid=%s\n", 
stream->oa_config->uuid);
-
+   mutex_unlock(&stream->config_mutex);
 mutex_unlock(&dev_priv->drm.struct_mutex);
  
+   i915_gem_object_put(stream->initial_oa_config_bo);

+   stream->initial_oa_config_bo = NULL;
+   if (ret)
+   goto err_enable;

Is it because of this err that may end up freeing the stream? I would
expect a similar wait-before-free on stream destroy.



There meant to be a wait-before-free at destroy. Looks like I screw up 
somewhere...




  In which case

Re: [Intel-gfx] [PATCH v12 08/11] drm/i915/perf: execute OA configuration from command stream

2019-08-30 Thread Chris Wilson
Quoting Lionel Landwerlin (2019-08-30 15:47:23)
>  err_unpin:
> -   __i915_vma_unpin(vma);
> +   mutex_lock(&i915->drm.struct_mutex);
> +   i915_vma_unpin_and_release(&vma, 0);
> +   mutex_unlock(&i915->drm.struct_mutex);

Strangely unpin_and_release() doesn't need the mutex. But I can clean
that up later.

>  
>  err_unref:
> i915_gem_object_put(bo);
> @@ -1947,50 +1961,69 @@ static int alloc_noa_wait(struct i915_perf_stream 
> *stream)
> return ret;
>  }
>  
> -static void config_oa_regs(struct drm_i915_private *dev_priv,
> -  const struct i915_oa_reg *regs,
> -  u32 n_regs)
> +static int emit_oa_config(struct drm_i915_private *i915,
> + struct i915_perf_stream *stream)
>  {
> -   u32 i;
> +   struct i915_request *rq;
> +   struct i915_vma *vma;
> +   u32 *cs;
> +   int err;
>  
> -   for (i = 0; i < n_regs; i++) {
> -   const struct i915_oa_reg *reg = regs + i;
> +   lockdep_assert_held(&stream->config_mutex);
>  
> -   I915_WRITE(reg->addr, reg->value);
> +   rq = i915_request_create(stream->engine->kernel_context);
> +   if (IS_ERR(rq))
> +   return PTR_ERR(rq);
> +
> +   err = i915_active_request_set(&stream->active_config_rq,
> + rq);
> +   if (err)
> +   goto err_add_request;
> +
> +   vma = i915_vma_instance(stream->initial_oa_config_bo,
> +   &i915->ggtt.vm, NULL);

Safer with stream->engine->gt->ggtt.vm

> +   if (unlikely(IS_ERR(vma))) {
> +   err = PTR_ERR(vma);
> +   goto err_add_request;
> }
> -}
>  
> -static void delay_after_mux(void)
> -{
> -   /*
> -* It apparently takes a fairly long time for a new MUX
> -* configuration to be be applied after these register writes.
> -* This delay duration was derived empirically based on the
> -* render_basic config but hopefully it covers the maximum
> -* configuration latency.
> -*
> -* As a fallback, the checks in _append_oa_reports() to skip
> -* invalid OA reports do also seem to work to discard reports
> -* generated before this config has completed - albeit not
> -* silently.
> -*
> -* Unfortunately this is essentially a magic number, since we
> -* don't currently know of a reliable mechanism for predicting
> -* how long the MUX config will take to apply and besides
> -* seeing invalid reports we don't know of a reliable way to
> -* explicitly check that the MUX config has landed.
> -*
> -* It's even possible we've miss characterized the underlying
> -* problem - it just seems like the simplest explanation why
> -* a delay at this location would mitigate any invalid reports.
> -*/
> -   usleep_range(15000, 2);
> +   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
> +   if (err)
> +   goto err_add_request;

Don't pin inside a request. do the pining before i915_request_create().
One day, we may need to allocate a request to do the pin.

Be safe,

i915_vma_lock(vma);
err = i915_request_await_object(rq, vma->obj, 0);
(yes, we need a better wrapper here)
if (err)
> +   err = i915_vma_move_to_active(vma, rq, 0);
i915_vma_unlock(vma);
> +   if (err)
> +   goto err_vma_unpin;
> +


> @@ -2658,16 +2684,31 @@ static int i915_oa_stream_init(struct 
> i915_perf_stream *stream,
> stream->ops = &i915_oa_stream_ops;
> dev_priv->perf.exclusive_stream = stream;
>  
> +   mutex_lock(&stream->config_mutex);
> ret = dev_priv->perf.ops.enable_metric_set(stream);
> if (ret) {
> DRM_DEBUG("Unable to enable metric set\n");
> -   goto err_enable;
> +   /*
> +* Ignore the return value since we already have an error from
> +* the enable vfunc.
> +*/
> +   i915_active_request_retire(&stream->active_config_rq,
> +  &stream->config_mutex);
> +   } else {
> +   ret = i915_active_request_retire(&stream->active_config_rq,
> +&stream->config_mutex);

This function doesn't exist anymore. It's basically just waiting for the
old request. Why do you need it? (Your request flow is otherwise ordered.)

> -   DRM_DEBUG("opening stream oa config uuid=%s\n", 
> stream->oa_config->uuid);
> -
> +   mutex_unlock(&stream->config_mutex);
> mutex_unlock(&dev_priv->drm.struct_mutex);
>  
> +   i915_gem_object_put(stream->initial_oa_config_bo);
> +   stream->initial_oa_config_bo = NULL;
> +   if (ret)
> +   goto err_enable;

Is it because of this err that may end up freeing the stream? I would
expect a similar wait