Re: [Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream

2019-09-09 Thread kbuild test robot
Hi Lionel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[cannot apply to v5.3-rc8 next-20190904]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190907-052009
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: x86_64-randconfig-f004-201936 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   drivers/gpu/drm/i915/i915_perf.c: In function 'i915_oa_stream_init':
>> drivers/gpu/drm/i915/i915_perf.c:2697:3: error: ignoring return value of 
>> 'i915_active_request_retire', declared with attribute warn_unused_result 
>> [-Werror=unused-result]
  i915_active_request_retire(&stream->active_config_rq, 0,
  ^~~~
&stream->config_mutex);
~~
   cc1: all warnings being treated as errors

vim +/i915_active_request_retire +2697 drivers/gpu/drm/i915/i915_perf.c

  2556  
  2557  /**
  2558   * i915_oa_stream_init - validate combined props for OA stream and init
  2559   * @stream: An i915 perf stream
  2560   * @param: The open parameters passed to `DRM_I915_PERF_OPEN`
  2561   * @props: The property state that configures stream (individually 
validated)
  2562   *
  2563   * While read_properties_unlocked() validates properties in isolation it
  2564   * doesn't ensure that the combination necessarily makes sense.
  2565   *
  2566   * At this point it has been determined that userspace wants a stream of
  2567   * OA metrics, but still we need to further validate the combined
  2568   * properties are OK.
  2569   *
  2570   * If the configuration makes sense then we can allocate memory for
  2571   * a circular OA buffer and apply the requested metric set 
configuration.
  2572   *
  2573   * Returns: zero on success or a negative error code.
  2574   */
  2575  static int i915_oa_stream_init(struct i915_perf_stream *stream,
  2576 struct drm_i915_perf_open_param *param,
  2577 struct perf_open_properties *props)
  2578  {
  2579  struct drm_i915_private *dev_priv = stream->dev_priv;
  2580  int format_size;
  2581  int ret;
  2582  
  2583  /* If the sysfs metrics/ directory wasn't registered for some
  2584   * reason then don't let userspace try their luck with config
  2585   * IDs
  2586   */
  2587  if (!dev_priv->perf.metrics_kobj) {
  2588  DRM_DEBUG("OA metrics weren't advertised via sysfs\n");
  2589  return -EINVAL;
  2590  }
  2591  
  2592  if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
  2593  DRM_DEBUG("Only OA report sampling supported\n");
  2594  return -EINVAL;
  2595  }
  2596  
  2597  if (!dev_priv->perf.ops.enable_metric_set) {
  2598  DRM_DEBUG("OA unit not supported\n");
  2599  return -ENODEV;
  2600  }
  2601  
  2602  /* To avoid the complexity of having to accurately filter
  2603   * counter reports and marshal to the appropriate client
  2604   * we currently only allow exclusive access
  2605   */
  2606  if (dev_priv->perf.exclusive_stream) {
  2607  DRM_DEBUG("OA unit already in use\n");
  2608  return -EBUSY;
  2609  }
  2610  
  2611  if (!props->oa_format) {
  2612  DRM_DEBUG("OA report format not specified\n");
  2613  return -EINVAL;
  2614  }
  2615  
  2616  mutex_init(&stream->config_mutex);
  2617  
  2618  stream->sample_size = sizeof(struct 
drm_i915_perf_record_header);
  2619  
  2620  format_size = dev_priv->perf.oa_formats[props->oa_format].size;
  2621  
  2622  stream->engine = props->engine;
  2623  
  2624  INIT_ACTIVE_REQUEST(&stream->active_config_rq,
  2625  &stream->config_mutex);
  2626  
  2627  stream->sample_flags |= SAMPLE_OA_REPORT;
  2628  stream->sample_size += format_size;
  2629  
  2630  stream->oa_buffer.format_size = format_size;
  2631  if (WARN_ON(stream->oa_buffer.format_size == 0))
  2632  return -EINVAL;
  2633  
  2634  stream->oa_buffer.format =
  2635  dev_priv->perf.oa_formats[props->oa_format].format;
  2636  
  2637  stream->periodic = props->oa_periodic;
  2638  if (stream->periodic)
  2639  stream->period_exponent = props

Re: [Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream

2019-09-06 Thread kbuild test robot
Hi Lionel,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on drm-intel/for-linux-next]
[cannot apply to v5.3-rc7 next-20190904]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190907-052009
base:   git://anongit.freedesktop.org/drm-intel for-linux-next
config: i386-randconfig-b001-201935 (attached as .config)
compiler: gcc-7 (Debian 7.4.0-11) 7.4.0
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

   In file included from :0:0:
   drivers/gpu/drm/i915/i915_perf_types.h:25:2: error: unknown type name 
'i915_reg_t'
 i915_reg_t addr;
 ^~
   drivers/gpu/drm/i915/i915_perf_types.h:32:12: error: 'UUID_STRING_LEN' 
undeclared here (not in a function); did you mean '_LINUX_STRING_H_'?
 char uuid[UUID_STRING_LEN + 1];
   ^~~
   _LINUX_STRING_H_
   drivers/gpu/drm/i915/i915_perf_types.h:75:6: error: unknown type name 
'poll_table'; did you mean 'poll_to_key'?
 poll_table *wait);
 ^~
 poll_to_key
   drivers/gpu/drm/i915/i915_perf_types.h:128:2: error: unknown type name 
'intel_wakeref_t'
 intel_wakeref_t wakeref;
 ^~~
>> drivers/gpu/drm/i915/i915_perf_types.h:188:29: error: field 
>> 'active_config_rq' has incomplete type
 struct i915_active_request active_config_rq;
^~~~

vim +/active_config_rq +188 drivers/gpu/drm/i915/i915_perf_types.h

50  
51  /**
52   * struct i915_perf_stream_ops - the OPs to support a specific stream 
type
53   */
54  struct i915_perf_stream_ops {
55  /**
56   * @enable: Enables the collection of HW samples, either in 
response to
57   * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is 
opened
58   * without `I915_PERF_FLAG_DISABLED`.
59   */
60  void (*enable)(struct i915_perf_stream *stream);
61  
62  /**
63   * @disable: Disables the collection of HW samples, either in 
response
64   * to `I915_PERF_IOCTL_DISABLE` or implicitly called before 
destroying
65   * the stream.
66   */
67  void (*disable)(struct i915_perf_stream *stream);
68  
69  /**
70   * @poll_wait: Call poll_wait, passing a wait queue that will 
be woken
71   * once there is something ready to read() for the stream
72   */
73  void (*poll_wait)(struct i915_perf_stream *stream,
74struct file *file,
  > 75poll_table *wait);
76  
77  /**
78   * @wait_unlocked: For handling a blocking read, wait until 
there is
79   * something to ready to read() for the stream. E.g. wait on 
the same
80   * wait queue that would be passed to poll_wait().
81   */
82  int (*wait_unlocked)(struct i915_perf_stream *stream);
83  
84  /**
85   * @read: Copy buffered metrics as records to userspace
86   * **buf**: the userspace, destination buffer
87   * **count**: the number of bytes to copy, requested by 
userspace
88   * **offset**: zero at the start of the read, updated as the 
read
89   * proceeds, it represents how many bytes have been copied so 
far and
90   * the buffer offset for copying the next record.
91   *
92   * Copy as many buffered i915 perf samples and records for this 
stream
93   * to userspace as will fit in the given buffer.
94   *
95   * Only write complete records; returning -%ENOSPC if there 
isn't room
96   * for a complete record.
97   *
98   * Return any error condition that results in a short read such 
as
99   * -%ENOSPC or -%EFAULT, even though these may be squashed 
before
   100   * returning to userspace.
   101   */
   102  int (*read)(struct i915_perf_stream *stream,
   103  char __user *buf,
   104  size_t count,
   105  size_t *offset);
   106  
   107  /**
   108   * @destroy: Cleanup any stream specific resources.
   109   *
   110   * The stream will always be disabled before this is called.
   111   */
   112  void (*destroy)(struct i915_perf_stream *stream);
   113  };
   114  
   115  /**
   116   * struct i915_perf_stream - state for a single open stream FD
   117   */
   118  struct i915_perf_stream {
   119  

[Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream

2019-09-06 Thread Lionel Landwerlin
We haven't run into issues with programming the global OA/NOA
registers configuration from CPU so far, but HW engineers actually
recommend doing this from the command streamer. On TGL in particular
one of the clock domain in which some of that programming goes might
not be powered when we poke things from the CPU.

Since we have a command buffer prepared for the execbuffer side of
things, we can reuse that approach here too.

This also allows us to significantly reduce the amount of time we hold
the main lock.

v2: Drop the global lock as much as possible

v3: Take global lock to pin global

v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel)

v5: Move locking to the stream (Lionel)

v6: Move active reconfiguration request into i915_perf_stream (Lionel)

v7: Pin VMA outside request creation (Chris)
Lock VMA before move to active (Chris)

v8: Fix double free on stream->initial_oa_config_bo (Lionel)
Don't allow interruption when waiting on active config request
(Lionel)

Signed-off-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_perf.c   | 170 -
 drivers/gpu/drm/i915/i915_perf_types.h |  13 +-
 2 files changed, 122 insertions(+), 61 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index f2b778d84b52..8e3532518139 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1558,18 +1558,23 @@ free_oa_configs(struct i915_perf_stream *stream)
 static void i915_oa_stream_destroy(struct i915_perf_stream *stream)
 {
struct drm_i915_private *dev_priv = stream->dev_priv;
+   int err;
 
BUG_ON(stream != dev_priv->perf.exclusive_stream);
 
-   /*
-* Unset exclusive_stream first, it will be checked while disabling
-* the metric set on gen8+.
-*/
mutex_lock(&dev_priv->drm.struct_mutex);
-   dev_priv->perf.exclusive_stream = NULL;
+   mutex_lock(&stream->config_mutex);
dev_priv->perf.ops.disable_metric_set(stream);
+   err = i915_active_request_retire(&stream->active_config_rq, 0,
+&stream->config_mutex);
+   mutex_unlock(&stream->config_mutex);
+   dev_priv->perf.exclusive_stream = NULL;
mutex_unlock(&dev_priv->drm.struct_mutex);
 
+   if (err)
+   DRM_ERROR("Failed to disable perf stream\n");
+
+
free_oa_buffer(stream);
free_noa_wait(stream);
 
@@ -1795,6 +1800,10 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
return PTR_ERR(bo);
}
 
+   ret = i915_mutex_lock_interruptible(&i915->drm);
+   if (ret)
+   goto err_unref;
+
/*
 * We pin in GGTT because we jump into this buffer now because
 * multiple OA config BOs will have a jump to this address and it
@@ -1802,10 +1811,13 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
 */
vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 4096, 0);
if (IS_ERR(vma)) {
+   mutex_unlock(&i915->drm.struct_mutex);
ret = PTR_ERR(vma);
goto err_unref;
}
 
+   mutex_unlock(&i915->drm.struct_mutex);
+
batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB);
if (IS_ERR(batch)) {
ret = PTR_ERR(batch);
@@ -1939,7 +1951,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
return 0;
 
 err_unpin:
-   __i915_vma_unpin(vma);
+   mutex_lock(&i915->drm.struct_mutex);
+   i915_vma_unpin_and_release(&vma, 0);
+   mutex_unlock(&i915->drm.struct_mutex);
 
 err_unref:
i915_gem_object_put(bo);
@@ -1947,50 +1961,73 @@ static int alloc_noa_wait(struct i915_perf_stream 
*stream)
return ret;
 }
 
-static void config_oa_regs(struct drm_i915_private *dev_priv,
-  const struct i915_oa_reg *regs,
-  u32 n_regs)
+static int emit_oa_config(struct drm_i915_private *i915,
+ struct i915_perf_stream *stream)
 {
-   u32 i;
+   struct i915_request *rq;
+   struct i915_vma *vma;
+   u32 *cs;
+   int err;
 
-   for (i = 0; i < n_regs; i++) {
-   const struct i915_oa_reg *reg = regs + i;
+   lockdep_assert_held(&stream->config_mutex);
+
+   vma = i915_vma_instance(stream->initial_oa_config_bo,
+   &stream->engine->gt->ggtt->vm, NULL);
+   if (unlikely(IS_ERR(vma)))
+   return PTR_ERR(vma);
 
-   I915_WRITE(reg->addr, reg->value);
+   err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+   if (err)
+   goto err_vma_unpin;
+
+   rq = i915_request_create(stream->engine->kernel_context);
+   if (IS_ERR(rq)) {
+   err = PTR_ERR(rq);
+   goto err_add_request;
}
-}
 
-static void delay_after_mux(void)
-{
-   /*
-* It apparently takes a fairly long time for a