Re: [Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream
Hi Lionel, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [cannot apply to v5.3-rc8 next-20190904] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190907-052009 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: x86_64-randconfig-f004-201936 (attached as .config) compiler: gcc-7 (Debian 7.4.0-11) 7.4.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): drivers/gpu/drm/i915/i915_perf.c: In function 'i915_oa_stream_init': >> drivers/gpu/drm/i915/i915_perf.c:2697:3: error: ignoring return value of >> 'i915_active_request_retire', declared with attribute warn_unused_result >> [-Werror=unused-result] i915_active_request_retire(&stream->active_config_rq, 0, ^~~~ &stream->config_mutex); ~~ cc1: all warnings being treated as errors vim +/i915_active_request_retire +2697 drivers/gpu/drm/i915/i915_perf.c 2556 2557 /** 2558 * i915_oa_stream_init - validate combined props for OA stream and init 2559 * @stream: An i915 perf stream 2560 * @param: The open parameters passed to `DRM_I915_PERF_OPEN` 2561 * @props: The property state that configures stream (individually validated) 2562 * 2563 * While read_properties_unlocked() validates properties in isolation it 2564 * doesn't ensure that the combination necessarily makes sense. 2565 * 2566 * At this point it has been determined that userspace wants a stream of 2567 * OA metrics, but still we need to further validate the combined 2568 * properties are OK. 2569 * 2570 * If the configuration makes sense then we can allocate memory for 2571 * a circular OA buffer and apply the requested metric set configuration. 2572 * 2573 * Returns: zero on success or a negative error code. 2574 */ 2575 static int i915_oa_stream_init(struct i915_perf_stream *stream, 2576 struct drm_i915_perf_open_param *param, 2577 struct perf_open_properties *props) 2578 { 2579 struct drm_i915_private *dev_priv = stream->dev_priv; 2580 int format_size; 2581 int ret; 2582 2583 /* If the sysfs metrics/ directory wasn't registered for some 2584 * reason then don't let userspace try their luck with config 2585 * IDs 2586 */ 2587 if (!dev_priv->perf.metrics_kobj) { 2588 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 2589 return -EINVAL; 2590 } 2591 2592 if (!(props->sample_flags & SAMPLE_OA_REPORT)) { 2593 DRM_DEBUG("Only OA report sampling supported\n"); 2594 return -EINVAL; 2595 } 2596 2597 if (!dev_priv->perf.ops.enable_metric_set) { 2598 DRM_DEBUG("OA unit not supported\n"); 2599 return -ENODEV; 2600 } 2601 2602 /* To avoid the complexity of having to accurately filter 2603 * counter reports and marshal to the appropriate client 2604 * we currently only allow exclusive access 2605 */ 2606 if (dev_priv->perf.exclusive_stream) { 2607 DRM_DEBUG("OA unit already in use\n"); 2608 return -EBUSY; 2609 } 2610 2611 if (!props->oa_format) { 2612 DRM_DEBUG("OA report format not specified\n"); 2613 return -EINVAL; 2614 } 2615 2616 mutex_init(&stream->config_mutex); 2617 2618 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 2619 2620 format_size = dev_priv->perf.oa_formats[props->oa_format].size; 2621 2622 stream->engine = props->engine; 2623 2624 INIT_ACTIVE_REQUEST(&stream->active_config_rq, 2625 &stream->config_mutex); 2626 2627 stream->sample_flags |= SAMPLE_OA_REPORT; 2628 stream->sample_size += format_size; 2629 2630 stream->oa_buffer.format_size = format_size; 2631 if (WARN_ON(stream->oa_buffer.format_size == 0)) 2632 return -EINVAL; 2633 2634 stream->oa_buffer.format = 2635 dev_priv->perf.oa_formats[props->oa_format].format; 2636 2637 stream->periodic = props->oa_periodic; 2638 if (stream->periodic) 2639 stream->period_exponent = props
Re: [Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream
Hi Lionel, Thank you for the patch! Yet something to improve: [auto build test ERROR on drm-intel/for-linux-next] [cannot apply to v5.3-rc7 next-20190904] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Lionel-Landwerlin/drm-i915-Vulkan-performance-query-support/20190907-052009 base: git://anongit.freedesktop.org/drm-intel for-linux-next config: i386-randconfig-b001-201935 (attached as .config) compiler: gcc-7 (Debian 7.4.0-11) 7.4.0 reproduce: # save the attached .config to linux build tree make ARCH=i386 If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): In file included from :0:0: drivers/gpu/drm/i915/i915_perf_types.h:25:2: error: unknown type name 'i915_reg_t' i915_reg_t addr; ^~ drivers/gpu/drm/i915/i915_perf_types.h:32:12: error: 'UUID_STRING_LEN' undeclared here (not in a function); did you mean '_LINUX_STRING_H_'? char uuid[UUID_STRING_LEN + 1]; ^~~ _LINUX_STRING_H_ drivers/gpu/drm/i915/i915_perf_types.h:75:6: error: unknown type name 'poll_table'; did you mean 'poll_to_key'? poll_table *wait); ^~ poll_to_key drivers/gpu/drm/i915/i915_perf_types.h:128:2: error: unknown type name 'intel_wakeref_t' intel_wakeref_t wakeref; ^~~ >> drivers/gpu/drm/i915/i915_perf_types.h:188:29: error: field >> 'active_config_rq' has incomplete type struct i915_active_request active_config_rq; ^~~~ vim +/active_config_rq +188 drivers/gpu/drm/i915/i915_perf_types.h 50 51 /** 52 * struct i915_perf_stream_ops - the OPs to support a specific stream type 53 */ 54 struct i915_perf_stream_ops { 55 /** 56 * @enable: Enables the collection of HW samples, either in response to 57 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened 58 * without `I915_PERF_FLAG_DISABLED`. 59 */ 60 void (*enable)(struct i915_perf_stream *stream); 61 62 /** 63 * @disable: Disables the collection of HW samples, either in response 64 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying 65 * the stream. 66 */ 67 void (*disable)(struct i915_perf_stream *stream); 68 69 /** 70 * @poll_wait: Call poll_wait, passing a wait queue that will be woken 71 * once there is something ready to read() for the stream 72 */ 73 void (*poll_wait)(struct i915_perf_stream *stream, 74struct file *file, > 75poll_table *wait); 76 77 /** 78 * @wait_unlocked: For handling a blocking read, wait until there is 79 * something to ready to read() for the stream. E.g. wait on the same 80 * wait queue that would be passed to poll_wait(). 81 */ 82 int (*wait_unlocked)(struct i915_perf_stream *stream); 83 84 /** 85 * @read: Copy buffered metrics as records to userspace 86 * **buf**: the userspace, destination buffer 87 * **count**: the number of bytes to copy, requested by userspace 88 * **offset**: zero at the start of the read, updated as the read 89 * proceeds, it represents how many bytes have been copied so far and 90 * the buffer offset for copying the next record. 91 * 92 * Copy as many buffered i915 perf samples and records for this stream 93 * to userspace as will fit in the given buffer. 94 * 95 * Only write complete records; returning -%ENOSPC if there isn't room 96 * for a complete record. 97 * 98 * Return any error condition that results in a short read such as 99 * -%ENOSPC or -%EFAULT, even though these may be squashed before 100 * returning to userspace. 101 */ 102 int (*read)(struct i915_perf_stream *stream, 103 char __user *buf, 104 size_t count, 105 size_t *offset); 106 107 /** 108 * @destroy: Cleanup any stream specific resources. 109 * 110 * The stream will always be disabled before this is called. 111 */ 112 void (*destroy)(struct i915_perf_stream *stream); 113 }; 114 115 /** 116 * struct i915_perf_stream - state for a single open stream FD 117 */ 118 struct i915_perf_stream { 119
[Intel-gfx] [PATCH v15 10/13] drm/i915/perf: execute OA configuration from command stream
We haven't run into issues with programming the global OA/NOA registers configuration from CPU so far, but HW engineers actually recommend doing this from the command streamer. On TGL in particular one of the clock domain in which some of that programming goes might not be powered when we poke things from the CPU. Since we have a command buffer prepared for the execbuffer side of things, we can reuse that approach here too. This also allows us to significantly reduce the amount of time we hold the main lock. v2: Drop the global lock as much as possible v3: Take global lock to pin global v4: Create i915 request in emit_oa_config() to avoid deadlocks (Lionel) v5: Move locking to the stream (Lionel) v6: Move active reconfiguration request into i915_perf_stream (Lionel) v7: Pin VMA outside request creation (Chris) Lock VMA before move to active (Chris) v8: Fix double free on stream->initial_oa_config_bo (Lionel) Don't allow interruption when waiting on active config request (Lionel) Signed-off-by: Lionel Landwerlin --- drivers/gpu/drm/i915/i915_perf.c | 170 - drivers/gpu/drm/i915/i915_perf_types.h | 13 +- 2 files changed, 122 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index f2b778d84b52..8e3532518139 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -1558,18 +1558,23 @@ free_oa_configs(struct i915_perf_stream *stream) static void i915_oa_stream_destroy(struct i915_perf_stream *stream) { struct drm_i915_private *dev_priv = stream->dev_priv; + int err; BUG_ON(stream != dev_priv->perf.exclusive_stream); - /* -* Unset exclusive_stream first, it will be checked while disabling -* the metric set on gen8+. -*/ mutex_lock(&dev_priv->drm.struct_mutex); - dev_priv->perf.exclusive_stream = NULL; + mutex_lock(&stream->config_mutex); dev_priv->perf.ops.disable_metric_set(stream); + err = i915_active_request_retire(&stream->active_config_rq, 0, +&stream->config_mutex); + mutex_unlock(&stream->config_mutex); + dev_priv->perf.exclusive_stream = NULL; mutex_unlock(&dev_priv->drm.struct_mutex); + if (err) + DRM_ERROR("Failed to disable perf stream\n"); + + free_oa_buffer(stream); free_noa_wait(stream); @@ -1795,6 +1800,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return PTR_ERR(bo); } + ret = i915_mutex_lock_interruptible(&i915->drm); + if (ret) + goto err_unref; + /* * We pin in GGTT because we jump into this buffer now because * multiple OA config BOs will have a jump to this address and it @@ -1802,10 +1811,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) */ vma = i915_gem_object_ggtt_pin(bo, NULL, 0, 4096, 0); if (IS_ERR(vma)) { + mutex_unlock(&i915->drm.struct_mutex); ret = PTR_ERR(vma); goto err_unref; } + mutex_unlock(&i915->drm.struct_mutex); + batch = cs = i915_gem_object_pin_map(bo, I915_MAP_WB); if (IS_ERR(batch)) { ret = PTR_ERR(batch); @@ -1939,7 +1951,9 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return 0; err_unpin: - __i915_vma_unpin(vma); + mutex_lock(&i915->drm.struct_mutex); + i915_vma_unpin_and_release(&vma, 0); + mutex_unlock(&i915->drm.struct_mutex); err_unref: i915_gem_object_put(bo); @@ -1947,50 +1961,73 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) return ret; } -static void config_oa_regs(struct drm_i915_private *dev_priv, - const struct i915_oa_reg *regs, - u32 n_regs) +static int emit_oa_config(struct drm_i915_private *i915, + struct i915_perf_stream *stream) { - u32 i; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; - for (i = 0; i < n_regs; i++) { - const struct i915_oa_reg *reg = regs + i; + lockdep_assert_held(&stream->config_mutex); + + vma = i915_vma_instance(stream->initial_oa_config_bo, + &stream->engine->gt->ggtt->vm, NULL); + if (unlikely(IS_ERR(vma))) + return PTR_ERR(vma); - I915_WRITE(reg->addr, reg->value); + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_vma_unpin; + + rq = i915_request_create(stream->engine->kernel_context); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_add_request; } -} -static void delay_after_mux(void) -{ - /* -* It apparently takes a fairly long time for a