On 30/08/2019 18:32, Chris Wilson wrote:
Quoting Lionel Landwerlin (2019-08-30 15:47:24)
+static int
+get_execbuf_oa_config(struct i915_execbuffer *eb)
+{
+       int err = 0;
+
+       eb->perf_file = NULL;
+       eb->oa_config = NULL;
+       eb->oa_vma = NULL;
+       eb->oa_bo = NULL;
+
+       if ((eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) 
== 0)
+               return 0;
+
+       eb->perf_file = fget(eb->extensions.perf_config.perf_fd);
+       if (!eb->perf_file)
+               return -EINVAL;
+
+       err = i915_mutex_lock_interruptible(&eb->i915->drm);
+       if (err) {
+               fput(eb->perf_file);
+               eb->perf_file = NULL;
+               return err;
+       }
+
+       if (eb->perf_file->private_data != eb->i915->perf.exclusive_stream)
+               err = -EINVAL;
+
+       mutex_unlock(&eb->i915->drm.struct_mutex);
So why can i915->perf.execlusive_stream not change after this point?


It changes only when the last FD reference is dropped, so if we got it here there is at least one ref from the app doing the ioctl and we grab another ref on it which hold onto until we return.



+
+       if (!err) {
+               err = i915_perf_get_oa_config_and_bo(
+                       eb->i915->perf.exclusive_stream,
+                       eb->extensions.perf_config.oa_config,
+                       &eb->oa_config, &eb->oa_bo);
+       }
+
+       return err;
+}
+
  static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
                              struct i915_vma *vma,
                              unsigned int len)
@@ -2051,6 +2098,50 @@ add_to_client(struct i915_request *rq, struct drm_file 
*file)
         spin_unlock(&file_priv->mm.lock);
  }
+static int eb_oa_config(struct i915_execbuffer *eb)
+{
+       struct i915_perf_stream *perf_stream;
+       int err;
+
+       if (!eb->oa_config)
+               return 0;
+
+       perf_stream = eb->perf_file->private_data;
+
+       err = mutex_lock_interruptible(&perf_stream->config_mutex);
+       if (err)
+               return err;
+
+       err = i915_active_request_set(&perf_stream->active_config_rq,
+                                     eb->request);
+       if (err)
+               goto out;
+
+       /*
+        * If the config hasn't changed, skip reconfiguring the HW (this is
+        * subject to a delay we want to avoid has much as possible).
+        */
+       if (eb->oa_config == perf_stream->oa_config)
+               goto out;
+
We need to wait for any exclusive fences in case we migrate this object
in future:

i915_vma_lock(eb->oa_vma);
err = i915_request_await_object(eb->request, eb->oa_vma->obj, false); /* 
await_resv already! */
if (err = 0)
        err = i915_vma_move_to_active(eb->oa_vma, eb->request, 0);
i915_vma_unlock(eb->oa_vma);

Though this raises an interesting point, we do not actually want to emit
a semaphore here (albeit the engine is fixed atm) as we are after the
point where we have declared all semaphore waits completed. (Not an
issue to worry about right now!)


If we do that for this VMA, don't we have to do it for any?

If that's happening in a different function, I could put it there.



+       if (err)
+               goto out;
+
+       err = eb->engine->emit_bb_start(eb->request,
+                                       eb->oa_vma->node.start,
+                                       0, I915_DISPATCH_SECURE);
+       if (err)
+               goto out;
+
+       swap(eb->oa_config, perf_stream->oa_config);
+
+out:
+       mutex_unlock(&perf_stream->config_mutex);
+
+       return err;
+}
+
  static int eb_submit(struct i915_execbuffer *eb)
  {
         int err;
@@ -2077,6 +2168,10 @@ static int eb_submit(struct i915_execbuffer *eb)
                         return err;
         }
+ err = eb_oa_config(eb);
+       if (err)
+               return err;
Ok, definitely needs to be after the waits!

+
         err = eb->engine->emit_bb_start(eb->request,
                                         eb->batch->node.start +
                                         eb->batch_start_offset,
@@ -2643,8 +2738,25 @@ static int parse_timeline_fences(struct 
i915_user_extension __user *ext, void *d
         return 0;
  }
+static int parse_perf_config(struct i915_user_extension __user *ext, void *data)
+{
+       struct i915_execbuffer *eb = data;
+
+       if (eb->extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF))
+               return -EINVAL;
+
+       if (copy_from_user(&eb->extensions.perf_config, ext,
+                          sizeof(eb->extensions.perf_config)))
+               return -EFAULT;
+
+       eb->extensions.flags |= BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF);
+
+       return 0;
+}
+
  static const i915_user_extension_fn execbuf_extensions[] = {
          [DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+        [DRM_I915_GEM_EXECBUFFER_EXT_PERF] = parse_perf_config,
  };
static int
@@ -2755,10 +2867,14 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                 }
         }
- err = eb_create(&eb);
+       err = get_execbuf_oa_config(&eb);
         if (err)
                 goto err_out_fence;
+ err = eb_create(&eb);
+       if (err)
+               goto err_oa_config;
+
         GEM_BUG_ON(!eb.lut_size);
err = eb_select_context(&eb);
@@ -2769,6 +2885,16 @@ i915_gem_do_execbuffer(struct drm_device *dev,
         if (unlikely(err))
                 goto err_context;
+ if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+               struct i915_perf_stream *perf_stream =
+                       eb.perf_file->private_data;
+
+               if (perf_stream->engine != eb.engine) {
+                       err = -EINVAL;
+                       goto err_engine;
+               }
+       }
+
         err = i915_mutex_lock_interruptible(dev);
         if (err)
                 goto err_engine;
@@ -2889,6 +3015,20 @@ i915_gem_do_execbuffer(struct drm_device *dev,
                 }
         }
+ if (eb.extensions.flags & BIT_ULL(DRM_I915_GEM_EXECBUFFER_EXT_PERF)) {
+               eb.oa_vma = i915_vma_instance(eb.oa_bo,
+                                             &eb.engine->i915->ggtt.vm, NULL);
eb.engine->gt->ggtt.vm


Thanks!



The i915_vma_instance() can be created outside of the struct_mutex, and
once we have the oa_vma, we don't need to keep a oa_bo pointer.

Later we can do the i915_vma_pin() before struct_mutex as well.

Thanks, that's looking a better with regard the plan to eliminate
struct_mutex.
-Chris


_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to