[Intel-gfx] [PATCH 3/3] drm/i915: peel dma-fence-chains wait fences
From: Lionel Landwerlin To allow faster engine to engine synchronization, peel the layer of dma-fence-chain to expose potential i915 fences so that the i915-request code can emit HW semaphore wait/signal operations in the ring which is faster than waking up the host to submit unblocked workloads after interrupt notification. Signed-off-by: Lionel Landwerlin --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 39 +-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 4cb4cd035daa..9b01f7c51b65 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2504,6 +2504,7 @@ await_fence_array(struct i915_execbuffer *eb, for (n = 0; n < nfences; n++) { struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; unsigned int flags; syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); @@ -2511,10 +2512,40 @@ await_fence_array(struct i915_execbuffer *eb, if (!fences[n].dma_fence) continue; - err = i915_request_await_dma_fence(eb->request, - fences[n].dma_fence); - if (err < 0) - return err; + /* +* If we're dealing with a dma-fence-chain, peel the chain by +* adding all of the unsignaled fences +* (dma_fence_chain_for_each does that for us) the chain +* points to. +* +* This enables us to identify waits on i915 fences and allows +* for faster engine-to-engine synchronization using HW +* semaphores. +*/ + chain = to_dma_fence_chain(fences[n].dma_fence); + if (chain) { + struct dma_fence *iter; + + dma_fence_chain_for_each(iter, fences[n].dma_fence) { + struct dma_fence_chain *iter_chain = + to_dma_fence_chain(iter); + + GEM_BUG_ON(!iter_chain); + + err = i915_request_await_dma_fence(eb->request, + iter_chain->fence); + if (err < 0) { + dma_fence_put(iter); + return err; + } + } + + } else { + err = i915_request_await_dma_fence(eb->request, + fences[n].dma_fence); + if (err < 0) + return err; + } } return 0; -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] drm/i915: introduce a mechanism to extend execbuf2
From: Lionel Landwerlin We're planning to use this for a couple of new feature where we need to provide additional parameters to execbuf. v2: Check for invalid flags in execbuffer2 (Lionel) v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson (v1) --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 39 ++- include/uapi/drm/i915_drm.h | 26 +++-- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9d11bad74e9a..16831f715daa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -26,6 +26,7 @@ #include "i915_gem_ioctls.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" +#include "i915_user_extensions.h" struct eb_vma { struct i915_vma *vma; @@ -288,6 +289,10 @@ struct i915_execbuffer { int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ struct eb_vma_array *array; + + struct { + u64 flags; /** Available extensions parameters */ + } extensions; }; static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) @@ -1698,7 +1703,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ - if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | +I915_EXEC_USE_EXTENSIONS))) { if (exec->num_cliprects || exec->cliprects_ptr) return -EINVAL; } @@ -2431,6 +2437,33 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static const i915_user_extension_fn execbuf_extensions[] = { +}; + +static int +parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) +{ + eb->extensions.flags = 0; + + if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) + return 0; + + /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot +* have another flag also using it at the same time. +*/ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + if (args->num_cliprects != 0) + return -EINVAL; + + return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), + execbuf_extensions, + ARRAY_SIZE(execbuf_extensions), + eb); +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, @@ -2484,6 +2517,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; + err = parse_execbuf2_extensions(args, &eb); + if (err) + return err; + if (args->flags & I915_EXEC_FENCE_IN) { in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); if (!in_fence) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 14b67cd6b54b..7ea38aa6502c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1046,6 +1046,10 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +enum drm_i915_gem_execbuffer_ext { + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1062,8 +1066,15 @@ struct drm_i915_gem_execbuffer2 { __u32 num_cliprects; /** * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY -* is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a -* struct drm_i915_gem_exec_fence *fences. +* & I915_EXEC_USE_EXTENSIONS are not set. +* +* If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array +* of struct drm_i915_gem_exec_fence and num_cliprects is the length +* of the array. +* +* If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a +* single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is +* 0. */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (0x3f) @@ -1181,7 +1192,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list
[Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn v3: Check that the chained extension is only present once (Chris) v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) v5: Use BIT_ULL (Chris) v6: Fix issue with already signaled timeline points, dma_fence_chain_find_seqno() setting fence to NULL (Chris) v7: Report ENOENT with invalid syncobj handle (Lionel) v8: Check for out of order timeline point insertion (Chris) v9: After explanations on https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html drop the ordering check from v8 (Lionel) v10: Set first extension enum item to 1 (Jason) v11: Add wait on previous sync points in timelines (Sandeep) Signed-off-by: Lionel Landwerlin Signed-off-by: Venkata Sandeep Dhanalakota --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 312 ++ drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 38 +++ 4 files changed, 296 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 16831f715daa..4cb4cd035daa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -230,6 +230,13 @@ enum { * the batchbuffer in trusted mode, otherwise the ioctl is rejected. */ +struct i915_eb_fences { + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + struct dma_fence *dma_fence; + u64 value; + struct dma_fence_chain *chain_fence; +}; + struct i915_execbuffer { struct drm_i915_private *i915; /** i915 backpointer */ struct drm_file *file; /** per-file lookup tables and limits */ @@ -292,6 +299,7 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; } extensions; }; @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb, } static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) { - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + while (n--) { + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + dma_fence_put(fences[n].dma_fence); + kfree(fences[n].chain_fence); + } kvfree(fences); } -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +static struct i915_eb_fences * +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) +{ + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = + &eb->extensions.timeline_fences; + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_eb_fences *fences; + u64 __user *user_values; + u64 num_fences, num_user_fences = timeline_fences->fence_count; + unsigned long n; + int err = 0; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_user_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return ERR_PTR(-EINVAL); + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) + return ERR_PTR(-EFAULT); + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(num_user_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & +~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { + struct drm_i915_gem_exec_fence user_fence; + struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + u64 point; + + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { + err = -EFAULT; + goto err; + } + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + +
Re: [Intel-gfx] [PATCH 2/3] drm/i915: add syncobj timeline support
On 20/04/08 07:29, Lionel Landwerlin wrote: > On 06/04/2020 23:07, Venkata Sandeep Dhanalakota wrote: > > Introduces a new parameters to execbuf so that we can specify syncobj > > handles as well as timeline points. > > > > v2: Reuse i915_user_extension_fn > > > > v3: Check that the chained extension is only present once (Chris) > > > > v4: Check that dma_fence_chain_find_seqno returns a non NULL fence > > (Lionel) > > > > v5: Use BIT_ULL (Chris) > > > > v6: Fix issue with already signaled timeline points, > > dma_fence_chain_find_seqno() setting fence to NULL (Chris) > > > > v7: Report ENOENT with invalid syncobj handle (Lionel) > > > > v8: Check for out of order timeline point insertion (Chris) > > > > v9: After explanations on > > > > https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html > > drop the ordering check from v8 (Lionel) > > > > v10: Set first extension enum item to 1 (Jason) > > > > v11: Add wait on previous sync points in timelines (Sandeep) > > > Thanks for picking this series up! > > > Could you point to the changes in v11? > > I haven't look at it in a while and I can't remember what you would have > changed. > Hi, Mainly the changes are in get_timeline_fence_array(), to enforce the implicit dependencies in signal fence-array. we want have efficient waits on the last point on timelines so that we signal at a correct point in time along the timeline the order is controlled so that we always wait on the previous request/sync point in the timeline and signal after the completion of the current request. Thank you, ~sandeep > > Thanks a lot, > > > -Lionel > > > > > > Signed-off-by: Lionel Landwerlin > > Signed-off-by: Venkata Sandeep Dhanalakota > intel.com> > > --- > > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 312 ++ > > drivers/gpu/drm/i915/i915_drv.c | 3 +- > > drivers/gpu/drm/i915/i915_getparam.c | 1 + > > include/uapi/drm/i915_drm.h | 38 +++ > > 4 files changed, 296 insertions(+), 58 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > index 16831f715daa..4cb4cd035daa 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > @@ -230,6 +230,13 @@ enum { > >* the batchbuffer in trusted mode, otherwise the ioctl is rejected. > >*/ > > +struct i915_eb_fences { > > + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ > > + struct dma_fence *dma_fence; > > + u64 value; > > + struct dma_fence_chain *chain_fence; > > +}; > > + > > struct i915_execbuffer { > > struct drm_i915_private *i915; /** i915 backpointer */ > > struct drm_file *file; /** per-file lookup tables and limits */ > > @@ -292,6 +299,7 @@ struct i915_execbuffer { > > struct { > > u64 flags; /** Available extensions parameters */ > > + struct drm_i915_gem_execbuffer_ext_timeline_fences > > timeline_fences; > > } extensions; > > }; > > @@ -2244,67 +2252,219 @@ eb_pin_engine(struct i915_execbuffer *eb, > > } > > static void > > -__free_fence_array(struct drm_syncobj **fences, unsigned int n) > > +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) > > { > > - while (n--) > > - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); > > + while (n--) { > > + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); > > + dma_fence_put(fences[n].dma_fence); > > + kfree(fences[n].chain_fence); > > + } > > kvfree(fences); > > } > > -static struct drm_syncobj ** > > -get_fence_array(struct drm_i915_gem_execbuffer2 *args, > > - struct drm_file *file) > > +static struct i915_eb_fences * > > +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) > > +{ > > + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = > > + &eb->extensions.timeline_fences; > > + struct drm_i915_gem_exec_fence __user *user_fences; > > + struct i915_eb_fences *fences; > > + u64 __user *user_values; > > + u64 num_fences, num_user_fences = timeline_fences->fence_count; > > + unsigned long n; > > + int err = 0; > > + > > + /* Check multiplication o
Re: [Intel-gfx] [PATCH 04/10] dma-buf: Report signaled links inside dma-fence-chain
On 20/04/03 10:12, Chris Wilson wrote: > Whenever we walk along the dma-fence-chain, we prune signaled links to > keep the chain nice and tidy. This leads to situations where we can > prune a link and report the earlier fence as the target seqno -- > violating our own consistency checks that the seqno is not more advanced > than the last element in a dma-fence-chain. > > Report a NULL fence and success if the seqno has already been signaled. > > Signed-off-by: Chris Wilson > --- > drivers/dma-buf/dma-fence-chain.c | 7 +++ > 1 file changed, 7 insertions(+) > > diff --git a/drivers/dma-buf/dma-fence-chain.c > b/drivers/dma-buf/dma-fence-chain.c > index 3d123502ff12..c435bbba851c 100644 > --- a/drivers/dma-buf/dma-fence-chain.c > +++ b/drivers/dma-buf/dma-fence-chain.c > @@ -99,6 +99,12 @@ int dma_fence_chain_find_seqno(struct dma_fence **pfence, > uint64_t seqno) > return -EINVAL; > > dma_fence_chain_for_each(*pfence, &chain->base) { > + if ((*pfence)->seqno < seqno) { /* already signaled */ > + dma_fence_put(*pfence); > + *pfence = NULL; > + break; > + } > + Looks good to me. Tested-by: Venkata Sandeep Dhanalakota Reviewed-by: Venkata Sandeep Dhanalakota > if ((*pfence)->context != chain->base.context || > to_dma_fence_chain(*pfence)->prev_seqno < seqno) > break; > @@ -222,6 +228,7 @@ EXPORT_SYMBOL(dma_fence_chain_ops); > * @chain: the chain node to initialize > * @prev: the previous fence > * @fence: the current fence > + * @seqno: the sequence number (syncpt) of the fence within the chain > * > * Initialize a new chain node and either start a new chain or add the node > to > * the existing chain of the previous fence. > -- > 2.20.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 05/10] dma-buf: Exercise dma-fence-chain under selftests
;, i); > + err = -EINVAL; > + goto err; > + } > + } > + > +err: > + fence_chains_fini(&fc); > + return err; > +} > + > +static int __wait_fence_chains(void *arg) > +{ > + struct fence_chains *fc = arg; > + > + if (dma_fence_wait(fc->tail, false)) > + return -EIO; > + > + return 0; > +} > + > +static int wait_forward(void *arg) > +{ > + struct fence_chains fc; > + struct task_struct *tsk; > + int err; > + int i; > + > + err = fence_chains_init(&fc, 64 << 10, seqno_inc); > + if (err) > + return err; > + > + tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait"); > + if (IS_ERR(tsk)) { > + err = PTR_ERR(tsk); > + goto err; > + } > + get_task_struct(tsk); > + yield_to(tsk, true); > + > + for (i = 0; i < fc.chain_length; i++) > + dma_fence_signal(fc.fences[i]); > + > + err = kthread_stop(tsk); > + put_task_struct(tsk); > + > +err: > + fence_chains_fini(&fc); > + return err; > +} > + > +static int wait_backward(void *arg) > +{ > + struct fence_chains fc; > + struct task_struct *tsk; > + int err; > + int i; > + > + err = fence_chains_init(&fc, 64 << 10, seqno_inc); > + if (err) > + return err; > + > + tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait"); > + if (IS_ERR(tsk)) { > + err = PTR_ERR(tsk); > + goto err; > + } > + get_task_struct(tsk); > + yield_to(tsk, true); > + > + for (i = fc.chain_length; i--; ) > + dma_fence_signal(fc.fences[i]); > + > + err = kthread_stop(tsk); > + put_task_struct(tsk); > + > +err: > + fence_chains_fini(&fc); > + return err; > +} > + > +static void randomise_fences(struct fence_chains *fc) > +{ > + unsigned int count = fc->chain_length; > + > + /* Fisher-Yates shuffle courtesy of Knuth */ > + while (--count) { > + unsigned int swp; > + > + swp = prandom_u32_max(count + 1); > + if (swp == count) > + continue; > + > + swap(fc->fences[count], fc->fences[swp]); > + } > +} > + > +static int wait_random(void *arg) > +{ > + struct fence_chains fc; > + struct task_struct *tsk; > + int err; > + int i; > + > + err = fence_chains_init(&fc, 64 << 10, seqno_inc); > + if (err) > + return err; > + > + randomise_fences(&fc); > + > + tsk = kthread_run(__wait_fence_chains, &fc, "dmabuf/wait"); > + if (IS_ERR(tsk)) { > + err = PTR_ERR(tsk); > + goto err; > + } > + get_task_struct(tsk); > + yield_to(tsk, true); > + > + for (i = 0; i < fc.chain_length; i++) > + dma_fence_signal(fc.fences[i]); > + > + err = kthread_stop(tsk); > + put_task_struct(tsk); > + > +err: > + fence_chains_fini(&fc); > + return err; > +} > + > +int dma_fence_chain(void) > +{ > + static const struct subtest tests[] = { > + SUBTEST(sanitycheck), > + SUBTEST(find_seqno), > + SUBTEST(find_signaled), > + SUBTEST(find_out_of_order), > + SUBTEST(find_gap), > + SUBTEST(find_race), > + SUBTEST(signal_forward), > + SUBTEST(signal_backward), > + SUBTEST(wait_forward), > + SUBTEST(wait_backward), > + SUBTEST(wait_random), > + }; > + int ret; > + > + pr_info("sizeof(dma_fence_chain)=%zu\n", > + sizeof(struct dma_fence_chain)); > + > + slab_fences = KMEM_CACHE(mock_fence, > + SLAB_TYPESAFE_BY_RCU | > + SLAB_HWCACHE_ALIGN); > + if (!slab_fences) > + return -ENOMEM; > + > + ret = subtests(tests, NULL); > + > + kmem_cache_destroy(slab_fences); > + return ret; > +} I think it covers all api required for fence-chain. Reviewed-by: Venkata Sandeep Dhanalakota > -- > 2.20.1 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/4] drm/i915: add syncobj timeline support
Introduces a new parameters to execbuf so that we can specify syncobj handles as well as timeline points. v2: Reuse i915_user_extension_fn v3: Check that the chained extension is only present once (Chris) v4: Check that dma_fence_chain_find_seqno returns a non NULL fence (Lionel) v5: Use BIT_ULL (Chris) v6: Fix issue with already signaled timeline points, dma_fence_chain_find_seqno() setting fence to NULL (Chris) v7: Report ENOENT with invalid syncobj handle (Lionel) v8: Check for out of order timeline point insertion (Chris) v9: After explanations on https://lists.freedesktop.org/archives/dri-devel/2019-August/229287.html drop the ordering check from v8 (Lionel) v10: Set first extension enum item to 1 (Jason) v11: Add wait on previous sync points in timelines (Sandeep) Signed-off-by: Lionel Landwerlin Signed-off-by: Venkata Sandeep Dhanalakota --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 316 ++ drivers/gpu/drm/i915/i915_drv.c | 3 +- drivers/gpu/drm/i915/i915_getparam.c | 1 + include/uapi/drm/i915_drm.h | 38 +++ 4 files changed, 300 insertions(+), 58 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 16831f715daa..8dd651cdca39 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -230,6 +230,13 @@ enum { * the batchbuffer in trusted mode, otherwise the ioctl is rejected. */ +struct i915_eb_fences { + struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */ + struct dma_fence *dma_fence; + u64 value; + struct dma_fence_chain *chain_fence; +}; + struct i915_execbuffer { struct drm_i915_private *i915; /** i915 backpointer */ struct drm_file *file; /** per-file lookup tables and limits */ @@ -292,6 +299,7 @@ struct i915_execbuffer { struct { u64 flags; /** Available extensions parameters */ + struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; } extensions; }; @@ -2244,67 +2252,223 @@ eb_pin_engine(struct i915_execbuffer *eb, } static void -__free_fence_array(struct drm_syncobj **fences, unsigned int n) +__free_fence_array(struct i915_eb_fences *fences, unsigned int n) { - while (n--) - drm_syncobj_put(ptr_mask_bits(fences[n], 2)); + while (n--) { + drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2)); + dma_fence_put(fences[n].dma_fence); + kfree(fences[n].chain_fence); + } kvfree(fences); } -static struct drm_syncobj ** -get_fence_array(struct drm_i915_gem_execbuffer2 *args, - struct drm_file *file) +static struct i915_eb_fences * +get_timeline_fence_array(struct i915_execbuffer *eb, int *out_n_fences) +{ + struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences = + &eb->extensions.timeline_fences; + struct drm_i915_gem_exec_fence __user *user_fences; + struct i915_eb_fences *fences; + u64 __user *user_values; + u64 num_fences, num_user_fences = timeline_fences->fence_count; + unsigned long n; + int err = 0; + + /* Check multiplication overflow for access_ok() and kvmalloc_array() */ + BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long)); + if (num_user_fences > min_t(unsigned long, + ULONG_MAX / sizeof(*user_fences), + SIZE_MAX / sizeof(*fences))) + return ERR_PTR(-EINVAL); + + user_fences = u64_to_user_ptr(timeline_fences->handles_ptr); + if (!access_ok(user_fences, num_user_fences * sizeof(*user_fences))) + return ERR_PTR(-EFAULT); + + user_values = u64_to_user_ptr(timeline_fences->values_ptr); + if (!access_ok(user_values, num_user_fences * sizeof(*user_values))) + return ERR_PTR(-EFAULT); + + fences = kvmalloc_array(num_user_fences, sizeof(*fences), + __GFP_NOWARN | GFP_KERNEL); + if (!fences) + return ERR_PTR(-ENOMEM); + + BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) & +~__I915_EXEC_FENCE_UNKNOWN_FLAGS); + + for (n = 0, num_fences = 0; n < timeline_fences->fence_count; n++) { + struct drm_i915_gem_exec_fence user_fence; + struct drm_syncobj *syncobj; + struct dma_fence *fence = NULL; + u64 point; + + if (__copy_from_user(&user_fence, user_fences++, sizeof(user_fence))) { + err = -EFAULT; + goto err; + } + + if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS) { + err = -EINVAL; + goto err; + } + +
[Intel-gfx] [PATCH 1/4] drm/i915: introduce a mechanism to extend execbuf2
From: Lionel Landwerlin We're planning to use this for a couple of new feature where we need to provide additional parameters to execbuf. v2: Check for invalid flags in execbuffer2 (Lionel) v3: Rename I915_EXEC_EXT -> I915_EXEC_USE_EXTENSIONS (Chris) Signed-off-by: Lionel Landwerlin Reviewed-by: Chris Wilson (v1) --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 39 ++- include/uapi/drm/i915_drm.h | 26 +++-- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 9d11bad74e9a..16831f715daa 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -26,6 +26,7 @@ #include "i915_gem_ioctls.h" #include "i915_sw_fence_work.h" #include "i915_trace.h" +#include "i915_user_extensions.h" struct eb_vma { struct i915_vma *vma; @@ -288,6 +289,10 @@ struct i915_execbuffer { int lut_size; struct hlist_head *buckets; /** ht for relocation handles */ struct eb_vma_array *array; + + struct { + u64 flags; /** Available extensions parameters */ + } extensions; }; static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) @@ -1698,7 +1703,8 @@ static int i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec) return -EINVAL; /* Kernel clipping was a DRI1 misfeature */ - if (!(exec->flags & I915_EXEC_FENCE_ARRAY)) { + if (!(exec->flags & (I915_EXEC_FENCE_ARRAY | +I915_EXEC_USE_EXTENSIONS))) { if (exec->num_cliprects || exec->cliprects_ptr) return -EINVAL; } @@ -2431,6 +2437,33 @@ static void eb_request_add(struct i915_execbuffer *eb) mutex_unlock(&tl->mutex); } +static const i915_user_extension_fn execbuf_extensions[] = { +}; + +static int +parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args, + struct i915_execbuffer *eb) +{ + eb->extensions.flags = 0; + + if (!(args->flags & I915_EXEC_USE_EXTENSIONS)) + return 0; + + /* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot +* have another flag also using it at the same time. +*/ + if (eb->args->flags & I915_EXEC_FENCE_ARRAY) + return -EINVAL; + + if (args->num_cliprects != 0) + return -EINVAL; + + return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr), + execbuf_extensions, + ARRAY_SIZE(execbuf_extensions), + eb); +} + static int i915_gem_do_execbuffer(struct drm_device *dev, struct drm_file *file, @@ -2484,6 +2517,10 @@ i915_gem_do_execbuffer(struct drm_device *dev, if (args->flags & I915_EXEC_IS_PINNED) eb.batch_flags |= I915_DISPATCH_PINNED; + err = parse_execbuf2_extensions(args, &eb); + if (err) + return err; + if (args->flags & I915_EXEC_FENCE_IN) { in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2)); if (!in_fence) diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 14b67cd6b54b..7ea38aa6502c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1046,6 +1046,10 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +enum drm_i915_gem_execbuffer_ext { + DRM_I915_GEM_EXECBUFFER_EXT_MAX /* non-ABI */ +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1062,8 +1066,15 @@ struct drm_i915_gem_execbuffer2 { __u32 num_cliprects; /** * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY -* is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a -* struct drm_i915_gem_exec_fence *fences. +* & I915_EXEC_USE_EXTENSIONS are not set. +* +* If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array +* of struct drm_i915_gem_exec_fence and num_cliprects is the length +* of the array. +* +* If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a +* single struct drm_i915_gem_base_execbuffer_ext and num_cliprects is +* 0. */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (0x3f) @@ -1181,7 +1192,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list
[Intel-gfx] [PATCH 4/4] drm/selftests: selftest for timeline semaphore
simple tests using drm api for timeline semaphore. Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/selftests/Makefile| 2 +- .../drm/selftests/drm_timeline_selftests.h| 16 + .../selftests/test-drm_timeline_semaphore.c | 545 ++ 3 files changed, 562 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/selftests/drm_timeline_selftests.h create mode 100644 drivers/gpu/drm/selftests/test-drm_timeline_semaphore.c diff --git a/drivers/gpu/drm/selftests/Makefile b/drivers/gpu/drm/selftests/Makefile index 0856e4b12f70..5bceef7c9d02 100644 --- a/drivers/gpu/drm/selftests/Makefile +++ b/drivers/gpu/drm/selftests/Makefile @@ -4,4 +4,4 @@ test-drm_modeset-y := test-drm_modeset_common.o test-drm_plane_helper.o \ test-drm_damage_helper.o test-drm_dp_mst_helper.o \ test-drm_rect.o -obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm_modeset.o test-drm_cmdline_parser.o +obj-$(CONFIG_DRM_DEBUG_SELFTEST) += test-drm_mm.o test-drm_modeset.o test-drm_cmdline_parser.o test-drm_timeline_semaphore.o diff --git a/drivers/gpu/drm/selftests/drm_timeline_selftests.h b/drivers/gpu/drm/selftests/drm_timeline_selftests.h new file mode 100644 index ..8922a1eed525 --- /dev/null +++ b/drivers/gpu/drm/selftests/drm_timeline_selftests.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* List each unit test as selftest(name, function) + * + * The name is used as both an enum and expanded as igt__name to create + * a module parameter. It must be unique and legal for a C identifier. + * + * Tests are executed in order by igt/drm_timeline_selftests + */ +selftest(sanitycheck, igt_sanitycheck) /* keep first (selfcheck for igt) */ +selftest(chainbasic, igt_chainbasic) +selftest(waitchain, igt_waitchain) +selftest(signalseqno, igt_signalseqno) +selftest(waitseqno, igt_waitseqno) +selftest(addunorder, igt_addunorder) +selftest(findseqno, igt_findseqno) +selftest(igt_forward, igt_forward) diff --git a/drivers/gpu/drm/selftests/test-drm_timeline_semaphore.c b/drivers/gpu/drm/selftests/test-drm_timeline_semaphore.c new file mode 100644 index ..8a964d302e42 --- /dev/null +++ b/drivers/gpu/drm/selftests/test-drm_timeline_semaphore.c @@ -0,0 +1,545 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test cases for the timeline semaphore + */ + +#define pr_fmt(fmt) "drm_tl: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "../lib/drm_random.h" + +#define TESTS "drm_timeline_selftests.h" +#include "drm_selftest.h" + +#define MAX_TIMELINES 64 +#define MAX_THREADS (2 * MAX_TIMELINES) + +static struct kmem_cache *slab_timeline; +static struct kmem_cache *slab_fence_chain; + +struct mock_timeline { + struct drm_syncobj *syncobj; + + /* cb when base is signalled */ + struct dma_fence_cb cb; + struct dma_fence base; + /* lock for dma_fence */ + spinlock_t lock; + u64 point; + u32 flags; +}; + +struct fence_chain { + struct dma_fence_chain chain; + struct dma_fence fence; + /* cb when fence is signalled */ + struct dma_fence_cb cb; + atomic_t signalers; + spinlock_t lock; +}; + +struct chain_info { + struct fence_chain **chains; + int nchains; +}; + +static const char *mock_name(struct dma_fence *s) +{ + return "timeline"; +} + +static void mock_release(struct dma_fence *fence) +{ + pr_debug("release %lld\n",fence->seqno); +} + +static const struct dma_fence_ops mock_ops = { + .get_driver_name = mock_name, + .get_timeline_name = mock_name, + .release = mock_release, +}; + +static void fence_callback(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct fence_chain *t = + container_of(cb, struct fence_chain, cb); + + if (atomic_dec_and_test(&t->signalers)) + dma_fence_signal(&t->fence); +} + +static void timeline_callback(struct dma_fence *f, struct dma_fence_cb *cb) +{ + struct mock_timeline *t = + container_of(cb, struct mock_timeline, cb); + dma_fence_signal(&t->base); +} + +static struct mock_timeline *timeline(u64 point, u32 flags) +{ + struct mock_timeline *t; + + t = kmem_cache_alloc(slab_timeline, GFP_KERNEL | __GFP_ZERO); + if (!t) + return NULL; + + spin_lock_init(&t->lock); + dma_fence_init(&t->base, &mock_ops, &t->lock, 0, point); + drm_syncobj_create(&t->syncobj, flags, dma_fence_get(&t->base)); + t->point = point; + + return t; +} + +static struct fence_chain* fence_chain(struct dma_fence *prev, + u64 seqno) +{ + struct fence_chai
[Intel-gfx] [PATCH 3/4] drm/i915: peel dma-fence-chains wait fences
From: Lionel Landwerlin To allow faster engine to engine synchronization, peel the layer of dma-fence-chain to expose potential i915 fences so that the i915-request code can emit HW semaphore wait/signal operations in the ring which is faster than waking up the host to submit unblocked workloads after interrupt notification. Signed-off-by: Lionel Landwerlin --- .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 39 +-- 1 file changed, 35 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 8dd651cdca39..e43b76d7e9fd 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2508,6 +2508,7 @@ await_fence_array(struct i915_execbuffer *eb, for (n = 0; n < nfences; n++) { struct drm_syncobj *syncobj; + struct dma_fence_chain *chain; unsigned int flags; syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); @@ -2515,10 +2516,40 @@ await_fence_array(struct i915_execbuffer *eb, if (!fences[n].dma_fence) continue; - err = i915_request_await_dma_fence(eb->request, - fences[n].dma_fence); - if (err < 0) - return err; + /* +* If we're dealing with a dma-fence-chain, peel the chain by +* adding all of the unsignaled fences +* (dma_fence_chain_for_each does that for us) the chain +* points to. +* +* This enables us to identify waits on i915 fences and allows +* for faster engine-to-engine synchronization using HW +* semaphores. +*/ + chain = to_dma_fence_chain(fences[n].dma_fence); + if (chain) { + struct dma_fence *iter; + + dma_fence_chain_for_each(iter, fences[n].dma_fence) { + struct dma_fence_chain *iter_chain = + to_dma_fence_chain(iter); + + GEM_BUG_ON(!iter_chain); + + err = i915_request_await_dma_fence(eb->request, + iter_chain->fence); + if (err < 0) { + dma_fence_put(iter); + return err; + } + } + + } else { + err = i915_request_await_dma_fence(eb->request, + fences[n].dma_fence); + if (err < 0) + return err; + } } return 0; -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 3/4] drm/i915: peel dma-fence-chains wait fences
On 20/04/11 11:50, Lionel Landwerlin wrote: > On 10/04/2020 19:51, Venkata Sandeep Dhanalakota wrote: > > From: Lionel Landwerlin > > > > To allow faster engine to engine synchronization, peel the layer of > > dma-fence-chain to expose potential i915 fences so that the > > i915-request code can emit HW semaphore wait/signal operations in the > > ring which is faster than waking up the host to submit unblocked > > workloads after interrupt notification. > > > > Signed-off-by: Lionel Landwerlin > > --- > > .../gpu/drm/i915/gem/i915_gem_execbuffer.c| 39 +-- > > 1 file changed, 35 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > index 8dd651cdca39..e43b76d7e9fd 100644 > > --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c > > @@ -2508,6 +2508,7 @@ await_fence_array(struct i915_execbuffer *eb, > > for (n = 0; n < nfences; n++) { > > struct drm_syncobj *syncobj; > > + struct dma_fence_chain *chain; > > unsigned int flags; > > syncobj = ptr_unpack_bits(fences[n].syncobj, &flags, 2); > > @@ -2515,10 +2516,40 @@ await_fence_array(struct i915_execbuffer *eb, > > if (!fences[n].dma_fence) > > continue; > > - err = i915_request_await_dma_fence(eb->request, > > - fences[n].dma_fence); > > - if (err < 0) > > - return err; > > + /* > > +* If we're dealing with a dma-fence-chain, peel the chain by > > +* adding all of the unsignaled fences > > +* (dma_fence_chain_for_each does that for us) the chain > > +* points to. > > +* > > +* This enables us to identify waits on i915 fences and allows > > +* for faster engine-to-engine synchronization using HW > > +* semaphores. > > +*/ > > + chain = to_dma_fence_chain(fences[n].dma_fence); > > + if (chain) { > > + struct dma_fence *iter; > > + > > + dma_fence_chain_for_each(iter, fences[n].dma_fence) { > > > The kbuild bot made me think of an interesting case. > > It is possible to build a chain where the first element isn't a > dma_fence_chain. > Yes agreed, we could have a valid fence-chain with first element as normal dma_fence and so iter_chain can be null. Will address this in next revision of the patch. > > We should handle this here like this : > > > if (iter_chain) > > err = i915_request_await_dma_fence(eb->request, iter_chain->fence); > > else > > err = i915_request_await_dma_fence(eb->request, iter); > > if (err < 0) { > > dma_fence_put(iter); > > return err; > > } > > > > + struct dma_fence_chain *iter_chain = > > + to_dma_fence_chain(iter); > > + > > + GEM_BUG_ON(!iter_chain); > > + > > + err = i915_request_await_dma_fence(eb->request, > > + > > iter_chain->fence); > > + if (err < 0) { > > + dma_fence_put(iter); > > + return err; > > + } > > + } > > + > > + } else { > > + err = i915_request_await_dma_fence(eb->request, > > + fences[n].dma_fence); > > + if (err < 0) > > + return err; > > + } > > } > > return 0; > > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915/gt: Check we are the Ironlake IPS provider before deregistering
On 19/12/10 03:36, Chris Wilson wrote: > Check that we own the global pointer before deregistering. > > Reported-by: Venkata Sandeep Dhanalakota > Signed-off-by: Chris Wilson > Cc: Venkata Sandeep Dhanalakota > Cc: Andi Shyti > --- > drivers/gpu/drm/i915/gt/intel_rps.c | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c > b/drivers/gpu/drm/i915/gt/intel_rps.c > index 08a38a3b90b0..fd01e4100fc1 100644 > --- a/drivers/gpu/drm/i915/gt/intel_rps.c > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c > @@ -1715,6 +1715,7 @@ void intel_rps_driver_register(struct intel_rps *rps) >* set up, to avoid intel-ips sneaking in and reading bogus values. >*/ > if (IS_GEN(gt->i915, 5)) { > + GEM_BUG_ON(ips_mchdev); > rcu_assign_pointer(ips_mchdev, gt->i915); > ips_ping_for_i915_load(); > } > @@ -1722,7 +1723,8 @@ void intel_rps_driver_register(struct intel_rps *rps) > > void intel_rps_driver_unregister(struct intel_rps *rps) > { > - rcu_assign_pointer(ips_mchdev, NULL); > + if (ips_mchdev == rps_to_i915(rps)) > + rcu_assign_pointer(ips_mchdev, NULL); > } I think its right thing to do, looks good to me. Reviewed-by: Venkata Sandeep Dhanalakota > > static struct drm_i915_private *mchdev_get(void) > -- > 2.24.0 > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Use the i915_device name for identifying our request fences
On 19/12/11 03:02, Chris Wilson wrote: > Use the dev_name(i915) to identify the requests for debugging, so we can > tell different device timelines apart. > > Signed-off-by: Chris Wilson > Cc: Venkata Sandeep Dhanalakota > --- > drivers/gpu/drm/i915/i915_request.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/i915_request.c > b/drivers/gpu/drm/i915/i915_request.c > index a6238c626a16..9646e02edea3 100644 > --- a/drivers/gpu/drm/i915/i915_request.c > +++ b/drivers/gpu/drm/i915/i915_request.c > @@ -57,7 +57,7 @@ static struct i915_global_request { > > static const char *i915_fence_get_driver_name(struct dma_fence *fence) > { > - return "i915"; > + return dev_name(to_request(fence)->i915->drm.dev); > } > Sure, should we also update i915_fence_get_timeline_name() return to_request(fence)->gem_context->name ?: i915_fence_get_driver_name(fence); > static const char *i915_fence_get_timeline_name(struct dma_fence *fence) > -- > 2.24.0 > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
We do not require to register the sysctl paths per instance, so making registration global. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/i915_perf.c | 10 -- drivers/gpu/drm/i915/i915_perf_types.h | 1 - 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8d2e37949f46..426d04214a5d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -387,6 +387,8 @@ struct i915_oa_config_bo { struct i915_vma *vma; }; +static struct ctl_table_header *sysctl_header; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); void i915_oa_config_release(struct kref *ref) @@ -4345,7 +4347,8 @@ void i915_perf_init(struct drm_i915_private *i915) oa_sample_rate_hard_limit = 1000 * (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); - perf->sysctl_header = register_sysctl_table(dev_root); + if (!sysctl_header) + sysctl_header = register_sysctl_table(dev_root); mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); @@ -4395,7 +4398,10 @@ void i915_perf_fini(struct drm_i915_private *i915) idr_for_each(&perf->metrics_idr, destroy_config, perf); idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(perf->sysctl_header); + if (sysctl_header) { + unregister_sysctl_table(sysctl_header); + sysctl_header = NULL; + } memset(&perf->ops, 0, sizeof(perf->ops)); perf->i915 = NULL; diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..45e581455f5d 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -380,7 +380,6 @@ struct i915_perf { struct drm_i915_private *i915; struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; /* * Lock associated with adding/modifying/removing OA configs -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Make warned variable private
Make each instance to report the hang only once. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gpu_error.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ce130e1f1e47..8e35f92f914e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1284,6 +1284,8 @@ struct drm_i915_private { /* Mutex to protect the above hdcp component related values. */ struct mutex hdcp_comp_mutex; + bool warned; + I915_SELFTEST_DECLARE(struct i915_selftest_stash selftest;) /* diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8374d50c0770..ea282d9a9a3a 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1785,7 +1785,6 @@ void i915_capture_error_state(struct drm_i915_private *i915, intel_engine_mask_t engine_mask, const char *msg) { - static bool warned; struct i915_gpu_state *error; unsigned long flags; @@ -1815,7 +1814,7 @@ void i915_capture_error_state(struct drm_i915_private *i915, return; } - if (!xchg(&warned, true) && + if (!xchg(&i915->warned, true) && ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) { pr_info("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n"); pr_info("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n"); -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH] drm/i915: Use the i915_device name for identifying our request fences
On 19/12/11 04:07, Chris Wilson wrote: > Quoting Venkata Sandeep Dhanalakota (2019-12-11 15:59:09) > > On 19/12/11 03:02, Chris Wilson wrote: > > > Use the dev_name(i915) to identify the requests for debugging, so we can > > > tell different device timelines apart. > > > > > > Signed-off-by: Chris Wilson > > > Cc: Venkata Sandeep Dhanalakota > > > --- > > > drivers/gpu/drm/i915/i915_request.c | 2 +- > > > 1 file changed, 1 insertion(+), 1 deletion(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_request.c > > > b/drivers/gpu/drm/i915/i915_request.c > > > index a6238c626a16..9646e02edea3 100644 > > > --- a/drivers/gpu/drm/i915/i915_request.c > > > +++ b/drivers/gpu/drm/i915/i915_request.c > > > @@ -57,7 +57,7 @@ static struct i915_global_request { > > > > > > static const char *i915_fence_get_driver_name(struct dma_fence *fence) > > > { > > > - return "i915"; > > > + return dev_name(to_request(fence)->i915->drm.dev); > > > } > > > > > Sure, should we also update i915_fence_get_timeline_name() > > return to_request(fence)->gem_context->name ?: > > i915_fence_get_driver_name(fence); > > No need really. It's either a user context or a kernel context, the less > said to userspace about internals the better. It will be presented as > > 00:00:02.00 i915::[i915] (or something like that) > > If you would rather that "[i915]" be "[k]" or probably better yet "[" > DRIVER_NAME "]" got it, having "[i915]" makes sense. Reviewed-by: Venkata Sandeep Dhanalakota > -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
On 19/12/11 04:39, Tvrtko Ursulin wrote: > > On 11/12/2019 16:31, Tvrtko Ursulin wrote: > > On 11/12/2019 16:07, Venkata Sandeep Dhanalakota wrote: > > > We do not require to register the sysctl paths per instance, > > > so making registration global. > > > > > > Cc: Sudeep Dutt > > > Cc: Rodrigo Vivi > > > Cc: Daniel Vetter > > > Cc: Chris Wilson > > > Cc: Jani Nikula > > > Signed-off-by: Venkata Sandeep Dhanalakota > > > > > > --- > > > drivers/gpu/drm/i915/i915_perf.c | 10 -- > > > drivers/gpu/drm/i915/i915_perf_types.h | 1 - > > > 2 files changed, 8 insertions(+), 3 deletions(-) > > > > > > diff --git a/drivers/gpu/drm/i915/i915_perf.c > > > b/drivers/gpu/drm/i915/i915_perf.c > > > index 8d2e37949f46..426d04214a5d 100644 > > > --- a/drivers/gpu/drm/i915/i915_perf.c > > > +++ b/drivers/gpu/drm/i915/i915_perf.c > > > @@ -387,6 +387,8 @@ struct i915_oa_config_bo { > > > struct i915_vma *vma; > > > }; > > > +static struct ctl_table_header *sysctl_header; > > > + > > > static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer > > > *hrtimer); > > > void i915_oa_config_release(struct kref *ref) > > > @@ -4345,7 +4347,8 @@ void i915_perf_init(struct drm_i915_private *i915) > > > oa_sample_rate_hard_limit = 1000 * > > > (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); > > > - perf->sysctl_header = register_sysctl_table(dev_root); > > > + if (!sysctl_header) > > > + sysctl_header = register_sysctl_table(dev_root); > > > mutex_init(&perf->metrics_lock); > > > idr_init(&perf->metrics_idr); > > > @@ -4395,7 +4398,10 @@ void i915_perf_fini(struct drm_i915_private *i915) > > > idr_for_each(&perf->metrics_idr, destroy_config, perf); > > > idr_destroy(&perf->metrics_idr); > > > - unregister_sysctl_table(perf->sysctl_header); > > > + if (sysctl_header) { > > > + unregister_sysctl_table(sysctl_header); > > > + sysctl_header = NULL; > > > + } > > > > I am not sure if this could be racy with manual unbind from sysfs. Does > > PCI core serialize for us? > > Actually with two devices you also need to reference count it since you > don't want removal of the first device to remove the node but last. > Apparently this is not called during module exit, using krefs is way go to or have some helper which are called during module init/exit. void i915_perf_sysctl_register() { sysctl_header = register_sysctl_table(dev_root); } void i915_perf_sysctl_unregister() { unregister_sysctl_table(sysctl_header); } > > Regards, > > > > Tvrtko > > > > > memset(&perf->ops, 0, sizeof(perf->ops)); > > > perf->i915 = NULL; > > > diff --git a/drivers/gpu/drm/i915/i915_perf_types.h > > > b/drivers/gpu/drm/i915/i915_perf_types.h > > > index 74ddc20a0d37..45e581455f5d 100644 > > > --- a/drivers/gpu/drm/i915/i915_perf_types.h > > > +++ b/drivers/gpu/drm/i915/i915_perf_types.h > > > @@ -380,7 +380,6 @@ struct i915_perf { > > > struct drm_i915_private *i915; > > > struct kobject *metrics_kobj; > > > - struct ctl_table_header *sysctl_header; > > > /* > > > * Lock associated with adding/modifying/removing OA configs > > > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
We do not require to register the sysctl paths per instance, so making registration global. v2: make sysctl path register and unregister function driver specific (Tvrtko and Lucas). Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/i915_pci.c| 6 ++ drivers/gpu/drm/i915/i915_perf.c | 19 --- drivers/gpu/drm/i915/i915_perf.h | 2 ++ drivers/gpu/drm/i915/i915_perf_types.h | 1 - 4 files changed, 24 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index bba6b50e6beb..c5a2bb5e87fe 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -30,6 +30,7 @@ #include "display/intel_fbdev.h" #include "i915_drv.h" +#include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1051,6 +1052,10 @@ static int __init i915_init(void) return 0; } + err = i915_perf_sysctl_register(); + if (err) + return err; + return pci_register_driver(&i915_pci_driver); } @@ -1059,6 +1064,7 @@ static void __exit i915_exit(void) if (!i915_pci_driver.driver.owner) return; + i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); i915_globals_exit(); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8d2e37949f46..f039beed1771 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -387,6 +387,8 @@ struct i915_oa_config_bo { struct i915_vma *vma; }; +static struct ctl_table_header *sysctl_header; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); void i915_oa_config_release(struct kref *ref) @@ -4345,7 +4347,6 @@ void i915_perf_init(struct drm_i915_private *i915) oa_sample_rate_hard_limit = 1000 * (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); - perf->sysctl_header = register_sysctl_table(dev_root); mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); @@ -4381,6 +4382,20 @@ static int destroy_config(int id, void *p, void *data) return 0; } +int i915_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl_table(dev_root); + if (!sysctl_header) + return -ENOMEM; + + return 0; +} + +void i915_perf_sysctl_unregister(void) +{ + unregister_sysctl_table(sysctl_header); +} + /** * i915_perf_fini - Counter part to i915_perf_init() * @i915: i915 device instance @@ -4395,8 +4410,6 @@ void i915_perf_fini(struct drm_i915_private *i915) idr_for_each(&perf->metrics_idr, destroy_config, perf); idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(perf->sysctl_header); - memset(&perf->ops, 0, sizeof(perf->ops)); perf->i915 = NULL; } diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 4ceebce72060..1d1329e5af3a 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,6 +23,8 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); +int i915_perf_sysctl_register(void); +void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..45e581455f5d 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -380,7 +380,6 @@ struct i915_perf { struct drm_i915_private *i915; struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; /* * Lock associated with adding/modifying/removing OA configs -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Tag GEM_TRACE with device name
Adding device name to trace makes debugging easier, when dealing with multiple gpus. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/gem/i915_gem_pm.c| 4 +- drivers/gpu/drm/i915/gt/intel_context.c | 11 ++-- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +-- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 6 +-- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 12 ++--- drivers/gpu/drm/i915/gt/intel_lrc.c | 50 --- drivers/gpu/drm/i915/gt/intel_reset.c | 23 + .../gpu/drm/i915/gt/intel_ring_submission.c | 11 ++-- drivers/gpu/drm/i915/gt/selftest_hangcheck.c | 6 ++- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +-- drivers/gpu/drm/i915/i915_request.c | 15 -- 11 files changed, 92 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index f88ee1317bb4..037f2eb0b77b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,7 +13,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("[%s]\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); @@ -99,7 +99,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("[%s]\n", dev_name(i915->drm.dev)); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 61c39e943f69..5b3f05b29365 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -68,8 +68,9 @@ int __intel_context_do_pin(struct intel_context *ce) if (err) goto err; - GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", - ce->engine->name, ce->timeline->fence_context, + GEM_TRACE("[%s] %s context:%llx pin ring:{head:%04x, tail:%04x}\n", + dev_name(ce->engine->i915->drm.dev), ce->engine->name, + ce->timeline->fence_context, ce->ring->head, ce->ring->tail); i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ @@ -98,7 +99,8 @@ void intel_context_unpin(struct intel_context *ce) mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); if (likely(atomic_dec_and_test(&ce->pin_count))) { - GEM_TRACE("%s context:%llx retire\n", + GEM_TRACE("[%s] %s context:%llx retire\n", + dev_name(ce->engine->i915->drm.dev), ce->engine->name, ce->timeline->fence_context); ce->ops->unpin(ce); @@ -141,7 +143,8 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - GEM_TRACE("%s context:%llx retire\n", + GEM_TRACE("[%s] %s context:%llx retire\n", + dev_name(ce->engine->i915->drm.dev), ce->engine->name, ce->timeline->fence_context); set_bit(CONTEXT_VALID_BIT, &ce->flags); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 49473c25916c..a25947e8026f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -912,7 +912,7 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) if (INTEL_GEN(engine->i915) < 3) return -ENODEV; - GEM_TRACE("%s\n", engine->name); + GEM_TRACE("[%s] %s\n",dev_name(engine->i915->drm.dev), engine->name); intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); @@ -921,7 +921,8 @@ int intel_engine_stop_cs(struct intel_engine_cs *engine) mode, MODE_IDLE, MODE_IDLE, 1000, stop_timeout(engine), NULL)) { - GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); + GEM_TRACE("[%s] %s: timed out on STOP_RING -> IDLE\n", + dev_name(engine->i915->drm.dev), engine->name); err = -ETIMEDOUT; } @@ -933,7 +934,7 @@ int intel_engine_stop_cs(struct intel_engine_cs
Re: [Intel-gfx] [PATCH] drm/i915/gt: Mark up ips_mchdev pointer access
On 19/12/11 10:50, Chris Wilson wrote: > drivers/gpu/drm/i915/gt/intel_rps.c:1726:24: error: incompatible types in > comparison expression (different address spaces): > drivers/gpu/drm/i915/gt/intel_rps.c:1726:24:struct drm_i915_private > [noderef] * > drivers/gpu/drm/i915/gt/intel_rps.c:1726:24:struct drm_i915_private * > > Signed-off-by: Chris Wilson > --- > drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c > b/drivers/gpu/drm/i915/gt/intel_rps.c > index fd01e4100fc1..106c9fce9d6c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_rps.c > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c > @@ -1723,7 +1723,7 @@ void intel_rps_driver_register(struct intel_rps *rps) > > void intel_rps_driver_unregister(struct intel_rps *rps) > { > - if (ips_mchdev == rps_to_i915(rps)) > + if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) > rcu_assign_pointer(ips_mchdev, NULL); > } My bad, I missed to spot this in previous patch. Can we squash this with the other patch, if it was not merged already? > > -- > 2.24.0 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 07/33] drm/i915/gt: Mark up ips_mchdev pointer access
On 19/12/12 02:04, Chris Wilson wrote: > drivers/gpu/drm/i915/gt/intel_rps.c:1726:24: error: incompatible types in > comparison expression (different address spaces): > drivers/gpu/drm/i915/gt/intel_rps.c:1726:24:struct drm_i915_private > [noderef] * > drivers/gpu/drm/i915/gt/intel_rps.c:1726:24:struct drm_i915_private * > > Signed-off-by: Chris Wilson > --- > drivers/gpu/drm/i915/gt/intel_rps.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c > b/drivers/gpu/drm/i915/gt/intel_rps.c > index fd01e4100fc1..106c9fce9d6c 100644 > --- a/drivers/gpu/drm/i915/gt/intel_rps.c > +++ b/drivers/gpu/drm/i915/gt/intel_rps.c > @@ -1723,7 +1723,7 @@ void intel_rps_driver_register(struct intel_rps *rps) > > void intel_rps_driver_unregister(struct intel_rps *rps) > { > - if (ips_mchdev == rps_to_i915(rps)) > + if (rcu_access_pointer(ips_mchdev) == rps_to_i915(rps)) > rcu_assign_pointer(ips_mchdev, NULL); > } Right way to access rcu pointer. Reviewed-by: Venkata Sandeep Dhanalakota > > -- > 2.24.0 > > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/intel-gfx ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
On 19/12/12 01:34, Lucas De Marchi wrote: > On Wed, Dec 11, 2019 at 11:35:21PM -0800, Venkata Sandeep Dhanalakota wrote: > > We do not require to register the sysctl paths per instance, > > so making registration global. > > > > v2: make sysctl path register and unregister function driver > >specific (Tvrtko and Lucas). > > > > Cc: Sudeep Dutt > > Cc: Rodrigo Vivi > > Cc: Daniel Vetter > > Cc: Chris Wilson > > Cc: Jani Nikula > > Signed-off-by: Venkata Sandeep Dhanalakota > > --- > > drivers/gpu/drm/i915/i915_pci.c| 6 ++ > > drivers/gpu/drm/i915/i915_perf.c | 19 --- > > drivers/gpu/drm/i915/i915_perf.h | 2 ++ > > drivers/gpu/drm/i915/i915_perf_types.h | 1 - > > 4 files changed, 24 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/i915_pci.c > > b/drivers/gpu/drm/i915/i915_pci.c > > index bba6b50e6beb..c5a2bb5e87fe 100644 > > --- a/drivers/gpu/drm/i915/i915_pci.c > > +++ b/drivers/gpu/drm/i915/i915_pci.c > > @@ -30,6 +30,7 @@ > > #include "display/intel_fbdev.h" > > > > #include "i915_drv.h" > > +#include "i915_perf.h" > > #include "i915_globals.h" > > #include "i915_selftest.h" > > > > @@ -1051,6 +1052,10 @@ static int __init i915_init(void) > > return 0; > > } > > > > + err = i915_perf_sysctl_register(); > > + if (err) > > + return err; > > + > > return pci_register_driver(&i915_pci_driver); > > } > > > > @@ -1059,6 +1064,7 @@ static void __exit i915_exit(void) > > if (!i915_pci_driver.driver.owner) > > return; > > > > + i915_perf_sysctl_unregister(); > > honoring the init order means to unregister this after > pci_unregister_driver() I think we should reverse the init order, because if we cannot register pci driver successfully then we dont need to register sysctl table. > > > pci_unregister_driver(&i915_pci_driver); > > i915_globals_exit(); > > } > > diff --git a/drivers/gpu/drm/i915/i915_perf.c > > b/drivers/gpu/drm/i915/i915_perf.c > > index 8d2e37949f46..f039beed1771 100644 > > --- a/drivers/gpu/drm/i915/i915_perf.c > > +++ b/drivers/gpu/drm/i915/i915_perf.c > > @@ -387,6 +387,8 @@ struct i915_oa_config_bo { > > struct i915_vma *vma; > > }; > > > > +static struct ctl_table_header *sysctl_header; > > + > > static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); > > > > void i915_oa_config_release(struct kref *ref) > > @@ -4345,7 +4347,6 @@ void i915_perf_init(struct drm_i915_private *i915) > > > > oa_sample_rate_hard_limit = 1000 * > > (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); > > - perf->sysctl_header = register_sysctl_table(dev_root); > > doc for this function also needs an update with > s/module load/module bind/ sure. > > > > > mutex_init(&perf->metrics_lock); > > idr_init(&perf->metrics_idr); > > @@ -4381,6 +4382,20 @@ static int destroy_config(int id, void *p, void > > *data) > > return 0; > > } > > > > +int i915_perf_sysctl_register(void) > > +{ > > + sysctl_header = register_sysctl_table(dev_root); > > + if (!sysctl_header) > > + return -ENOMEM; > > Not sure about this return code here. grepping other drivers, this seems > to be common, but checking register_sysctl_table() it can actually fail > for other reasons. > > The previous behavior was to ignore it and not fail the entire thing... > just living without this sysctl. I'd say to keep that behavior. > Sure, we could ignore the return code for now. > Lucas De Marchi > > > + > > + return 0; > > +} > > + > > +void i915_perf_sysctl_unregister(void) > > +{ > > + unregister_sysctl_table(sysctl_header); > > +} > > + > > /** > > * i915_perf_fini - Counter part to i915_perf_init() > > * @i915: i915 device instance > > @@ -4395,8 +4410,6 @@ void i915_perf_fini(struct drm_i915_private *i915) > > idr_for_each(&perf->metrics_idr, destroy_config, perf); > > idr_destroy(&perf->metrics_idr); > > > > - unregister_sysctl_table(perf->sysctl_header); > > - > > memset(&perf->ops, 0, sizeof(perf->ops)); > > perf->i915 = NULL; > > } > > diff --git a/drivers/gpu/drm/i915
[Intel-gfx] [PATCH 2/2] drm/i915: Introduce new macros for tracing
New macros ENGINE_TRACE(), CE_TRACE() and RQ_TRACE() are introduce to tag device name and engine name with contexts and requests tracing in i915. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Chris Wilson Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/gem/i915_gem_pm.c| 4 +- drivers/gpu/drm/i915/gt/intel_context.c | 9 +- drivers/gpu/drm/i915/gt/intel_context.h | 7 ++ drivers/gpu/drm/i915/gt/intel_engine.h| 8 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 6 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 15 +-- drivers/gpu/drm/i915/gt/intel_lrc.c | 106 -- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- .../gpu/drm/i915/gt/intel_ring_submission.c | 11 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +- drivers/gpu/drm/i915/i915_request.c | 23 +--- drivers/gpu/drm/i915/i915_request.h | 8 ++ 13 files changed, 105 insertions(+), 108 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index f88ee1317bb4..3671a4e7e1cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,7 +13,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); @@ -99,7 +99,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 61c39e943f69..2deaa93045fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -68,8 +68,7 @@ int __intel_context_do_pin(struct intel_context *ce) if (err) goto err; - GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", - ce->engine->name, ce->timeline->fence_context, + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", ce->ring->head, ce->ring->tail); i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ @@ -98,8 +97,7 @@ void intel_context_unpin(struct intel_context *ce) mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); if (likely(atomic_dec_and_test(&ce->pin_count))) { - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); ce->ops->unpin(ce); @@ -141,8 +139,7 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 68b3d317d959..13cc9e0c98db 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -15,6 +15,13 @@ #include "intel_ring_types.h" #include "intel_timeline_types.h" +#define CE_TRACE(ce__, fmt, ...) do { \ + typecheck(struct intel_context, *(ce__)); \ + ENGINE_TRACE((ce__)->engine, "context:%llx" fmt,\ +(ce__)->timeline->fence_context, \ +##__VA_ARGS__);\ +} while(0); + void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c294ea80605e..d9c7121fa09e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -29,6 +29,14 @@ struct intel_gt; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) +#define ENGINE_TRACE(e__, fmt, ...) do { \ + typecheck(struct intel_engine_cs, *(e__)); \ +
[Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
We do not require to register the sysctl paths per instance, so making registration global. v2: make sysctl path register and unregister function driver specific (Tvrtko and Lucas). Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/i915_pci.c| 9 - drivers/gpu/drm/i915/i915_perf.c | 18 ++ drivers/gpu/drm/i915/i915_perf.h | 2 ++ drivers/gpu/drm/i915/i915_perf_types.h | 1 - 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index bba6b50e6beb..4b33128070da 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -30,6 +30,7 @@ #include "display/intel_fbdev.h" #include "i915_drv.h" +#include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1051,7 +1052,12 @@ static int __init i915_init(void) return 0; } - return pci_register_driver(&i915_pci_driver); + err = pci_register_driver(&i915_pci_driver); + if (err) + return err; + + i915_perf_sysctl_register(); + return 0; } static void __exit i915_exit(void) @@ -1059,6 +1065,7 @@ static void __exit i915_exit(void) if (!i915_pci_driver.driver.owner) return; + i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); i915_globals_exit(); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8d2e37949f46..4abd7623ef2d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -387,6 +387,8 @@ struct i915_oa_config_bo { struct i915_vma *vma; }; +static struct ctl_table_header *sysctl_header; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); void i915_oa_config_release(struct kref *ref) @@ -4228,7 +4230,7 @@ static struct ctl_table dev_root[] = { }; /** - * i915_perf_init - initialize i915-perf state on module load + * i915_perf_init - initialize i915-perf state on module bind * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. @@ -4345,7 +4347,6 @@ void i915_perf_init(struct drm_i915_private *i915) oa_sample_rate_hard_limit = 1000 * (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); - perf->sysctl_header = register_sysctl_table(dev_root); mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); @@ -4381,6 +4382,17 @@ static int destroy_config(int id, void *p, void *data) return 0; } +void i915_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl_table(dev_root); +} + +void i915_perf_sysctl_unregister(void) +{ + if (sysctl_header) + unregister_sysctl_table(sysctl_header); +} + /** * i915_perf_fini - Counter part to i915_perf_init() * @i915: i915 device instance @@ -4395,8 +4407,6 @@ void i915_perf_fini(struct drm_i915_private *i915) idr_for_each(&perf->metrics_idr, destroy_config, perf); idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(perf->sysctl_header); - memset(&perf->ops, 0, sizeof(perf->ops)); perf->i915 = NULL; } diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 4ceebce72060..882fdd0a7680 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,6 +23,8 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); +void i915_perf_sysctl_register(void); +void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..45e581455f5d 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -380,7 +380,6 @@ struct i915_perf { struct drm_i915_private *i915; struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; /* * Lock associated with adding/modifying/removing OA configs -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
We do not require to register the sysctl paths per instance, so making registration global. v2: make sysctl path register and unregister function driver specific (Tvrtko and Lucas). Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/i915_pci.c| 9 - drivers/gpu/drm/i915/i915_perf.c | 18 ++ drivers/gpu/drm/i915/i915_perf.h | 2 ++ drivers/gpu/drm/i915/i915_perf_types.h | 1 - 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index bba6b50e6beb..4b33128070da 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -30,6 +30,7 @@ #include "display/intel_fbdev.h" #include "i915_drv.h" +#include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1051,7 +1052,12 @@ static int __init i915_init(void) return 0; } - return pci_register_driver(&i915_pci_driver); + err = pci_register_driver(&i915_pci_driver); + if (err) + return err; + + i915_perf_sysctl_register(); + return 0; } static void __exit i915_exit(void) @@ -1059,6 +1065,7 @@ static void __exit i915_exit(void) if (!i915_pci_driver.driver.owner) return; + i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); i915_globals_exit(); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8d2e37949f46..4abd7623ef2d 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -387,6 +387,8 @@ struct i915_oa_config_bo { struct i915_vma *vma; }; +static struct ctl_table_header *sysctl_header; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); void i915_oa_config_release(struct kref *ref) @@ -4228,7 +4230,7 @@ static struct ctl_table dev_root[] = { }; /** - * i915_perf_init - initialize i915-perf state on module load + * i915_perf_init - initialize i915-perf state on module bind * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. @@ -4345,7 +4347,6 @@ void i915_perf_init(struct drm_i915_private *i915) oa_sample_rate_hard_limit = 1000 * (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); - perf->sysctl_header = register_sysctl_table(dev_root); mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); @@ -4381,6 +4382,17 @@ static int destroy_config(int id, void *p, void *data) return 0; } +void i915_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl_table(dev_root); +} + +void i915_perf_sysctl_unregister(void) +{ + if (sysctl_header) + unregister_sysctl_table(sysctl_header); +} + /** * i915_perf_fini - Counter part to i915_perf_init() * @i915: i915 device instance @@ -4395,8 +4407,6 @@ void i915_perf_fini(struct drm_i915_private *i915) idr_for_each(&perf->metrics_idr, destroy_config, perf); idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(perf->sysctl_header); - memset(&perf->ops, 0, sizeof(perf->ops)); perf->i915 = NULL; } diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 4ceebce72060..882fdd0a7680 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,6 +23,8 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); +void i915_perf_sysctl_register(void); +void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..45e581455f5d 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -380,7 +380,6 @@ struct i915_perf { struct drm_i915_private *i915; struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; /* * Lock associated with adding/modifying/removing OA configs -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915: Introduce new macros for tracing
New macros ENGINE_TRACE(), CE_TRACE(), RQ_TRACE() and GT_TRACE() are introduce to tag device name and engine name with contexts and requests tracing in i915. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Chris Wilson Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/gem/i915_gem_pm.c| 4 +- drivers/gpu/drm/i915/gt/intel_context.c | 9 +- drivers/gpu/drm/i915/gt/intel_context.h | 7 ++ drivers/gpu/drm/i915/gt/intel_engine.h| 8 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 6 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 15 ++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 6 + drivers/gpu/drm/i915/gt/intel_lrc.c | 106 -- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- .../gpu/drm/i915/gt/intel_ring_submission.c | 11 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +- drivers/gpu/drm/i915/i915_request.c | 23 +--- drivers/gpu/drm/i915/i915_request.h | 8 ++ 14 files changed, 110 insertions(+), 109 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index f88ee1317bb4..3671a4e7e1cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,7 +13,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); @@ -99,7 +99,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 61c39e943f69..2deaa93045fe 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -68,8 +68,7 @@ int __intel_context_do_pin(struct intel_context *ce) if (err) goto err; - GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", - ce->engine->name, ce->timeline->fence_context, + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", ce->ring->head, ce->ring->tail); i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ @@ -98,8 +97,7 @@ void intel_context_unpin(struct intel_context *ce) mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); if (likely(atomic_dec_and_test(&ce->pin_count))) { - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); ce->ops->unpin(ce); @@ -141,8 +139,7 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 68b3d317d959..13cc9e0c98db 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -15,6 +15,13 @@ #include "intel_ring_types.h" #include "intel_timeline_types.h" +#define CE_TRACE(ce__, fmt, ...) do { \ + typecheck(struct intel_context, *(ce__)); \ + ENGINE_TRACE((ce__)->engine, "context:%llx" fmt,\ +(ce__)->timeline->fence_context, \ +##__VA_ARGS__);\ +} while(0); + void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c294ea80605e..d9c7121fa09e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -29,6 +29,14 @@ struct intel_gt; #define CACHELINE_BYTES 64 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) +#define ENGINE_TRACE(e__, fmt, ...) do { \ +
[Intel-gfx] [PATCH 2/2] drm/i915: Introduce new macros for tracing
New macros ENGINE_TRACE(), CE_TRACE(), RQ_TRACE() and GT_TRACE() are introduce to tag device name and engine name with contexts and requests tracing in i915. v2: Addressed CI checkpatch issues. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/gem/i915_gem_pm.c| 4 +- drivers/gpu/drm/i915/gt/intel_context.c | 11 +- drivers/gpu/drm/i915/gt/intel_context.h | 7 ++ drivers/gpu/drm/i915/gt/intel_engine.h| 8 ++ drivers/gpu/drm/i915/gt/intel_engine_cs.c | 6 +- drivers/gpu/drm/i915/gt/intel_engine_pm.c | 6 +- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 15 ++- drivers/gpu/drm/i915/gt/intel_gt_pm.h | 6 + drivers/gpu/drm/i915/gt/intel_lrc.c | 106 -- drivers/gpu/drm/i915/gt/intel_reset.c | 2 +- .../gpu/drm/i915/gt/intel_ring_submission.c | 11 +- .../gpu/drm/i915/gt/uc/intel_guc_submission.c | 8 +- drivers/gpu/drm/i915/i915_request.c | 23 +--- drivers/gpu/drm/i915/i915_request.h | 8 ++ 14 files changed, 111 insertions(+), 110 deletions(-) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index f88ee1317bb4..3671a4e7e1cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -13,7 +13,7 @@ void i915_gem_suspend(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_wakeref_auto(&i915->ggtt.userfault_wakeref, 0); flush_workqueue(i915->wq); @@ -99,7 +99,7 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - GEM_TRACE("\n"); + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); intel_uncore_forcewake_get(&i915->uncore, FORCEWAKE_ALL); diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index 61c39e943f69..b1e346d2d35f 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -68,9 +68,8 @@ int __intel_context_do_pin(struct intel_context *ce) if (err) goto err; - GEM_TRACE("%s context:%llx pin ring:{head:%04x, tail:%04x}\n", - ce->engine->name, ce->timeline->fence_context, - ce->ring->head, ce->ring->tail); + CE_TRACE(ce, "pin ring:{head:%04x, tail:%04x}\n", +ce->ring->head, ce->ring->tail); i915_gem_context_get(ce->gem_context); /* for ctx->ppgtt */ @@ -98,8 +97,7 @@ void intel_context_unpin(struct intel_context *ce) mutex_lock_nested(&ce->pin_mutex, SINGLE_DEPTH_NESTING); if (likely(atomic_dec_and_test(&ce->pin_count))) { - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); ce->ops->unpin(ce); @@ -141,8 +139,7 @@ static void __intel_context_retire(struct i915_active *active) { struct intel_context *ce = container_of(active, typeof(*ce), active); - GEM_TRACE("%s context:%llx retire\n", - ce->engine->name, ce->timeline->fence_context); + CE_TRACE(ce, "retire\n"); set_bit(CONTEXT_VALID_BIT, &ce->flags); if (ce->state) diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index 68b3d317d959..29771dd78e4a 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -15,6 +15,13 @@ #include "intel_ring_types.h" #include "intel_timeline_types.h" +#define CE_TRACE((ce__), fmt, ...) do { \ + typecheck(struct intel_context, *(ce__)); \ + ENGINE_TRACE((ce__)->engine, "context:%llx" fmt,\ +(ce__)->timeline->fence_context, \ +##__VA_ARGS__);\ +} while (0) + void intel_context_init(struct intel_context *ce, struct i915_gem_context *ctx, struct intel_engine_cs *engine); diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index c294ea80605e..230b02774a4e 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -29,6 +29,14 @@ struct intel_gt; #define
[Intel-gfx] [PATCH 1/2] drm/i915/perf: Register sysctl path globally
We do not require to register the sysctl paths per instance, so making registration global. v2: make sysctl path register and unregister function driver specific (Tvrtko and Lucas). v3: remove the NULL-check as unregister_sysctl_table is null safe. (Chris and Lucas) Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Reviewed-by: Chris Wilson Reviewed-by: Lucas De Marchi Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/i915_pci.c| 9 - drivers/gpu/drm/i915/i915_perf.c | 17 + drivers/gpu/drm/i915/i915_perf.h | 2 ++ drivers/gpu/drm/i915/i915_perf_types.h | 1 - 4 files changed, 23 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 877560b1031e..9571611b4b16 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -30,6 +30,7 @@ #include "display/intel_fbdev.h" #include "i915_drv.h" +#include "i915_perf.h" #include "i915_globals.h" #include "i915_selftest.h" @@ -1053,7 +1054,12 @@ static int __init i915_init(void) return 0; } - return pci_register_driver(&i915_pci_driver); + err = pci_register_driver(&i915_pci_driver); + if (err) + return err; + + i915_perf_sysctl_register(); + return 0; } static void __exit i915_exit(void) @@ -1061,6 +1067,7 @@ static void __exit i915_exit(void) if (!i915_pci_driver.driver.owner) return; + i915_perf_sysctl_unregister(); pci_unregister_driver(&i915_pci_driver); i915_globals_exit(); } diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8d2e37949f46..3c163a9d69a9 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -387,6 +387,8 @@ struct i915_oa_config_bo { struct i915_vma *vma; }; +static struct ctl_table_header *sysctl_header; + static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); void i915_oa_config_release(struct kref *ref) @@ -4228,7 +4230,7 @@ static struct ctl_table dev_root[] = { }; /** - * i915_perf_init - initialize i915-perf state on module load + * i915_perf_init - initialize i915-perf state on module bind * @i915: i915 device instance * * Initializes i915-perf state without exposing anything to userspace. @@ -4345,7 +4347,6 @@ void i915_perf_init(struct drm_i915_private *i915) oa_sample_rate_hard_limit = 1000 * (RUNTIME_INFO(i915)->cs_timestamp_frequency_khz / 2); - perf->sysctl_header = register_sysctl_table(dev_root); mutex_init(&perf->metrics_lock); idr_init(&perf->metrics_idr); @@ -4381,6 +4382,16 @@ static int destroy_config(int id, void *p, void *data) return 0; } +void i915_perf_sysctl_register(void) +{ + sysctl_header = register_sysctl_table(dev_root); +} + +void i915_perf_sysctl_unregister(void) +{ + unregister_sysctl_table(sysctl_header); +} + /** * i915_perf_fini - Counter part to i915_perf_init() * @i915: i915 device instance @@ -4395,8 +4406,6 @@ void i915_perf_fini(struct drm_i915_private *i915) idr_for_each(&perf->metrics_idr, destroy_config, perf); idr_destroy(&perf->metrics_idr); - unregister_sysctl_table(perf->sysctl_header); - memset(&perf->ops, 0, sizeof(perf->ops)); perf->i915 = NULL; } diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h index 4ceebce72060..882fdd0a7680 100644 --- a/drivers/gpu/drm/i915/i915_perf.h +++ b/drivers/gpu/drm/i915/i915_perf.h @@ -23,6 +23,8 @@ void i915_perf_fini(struct drm_i915_private *i915); void i915_perf_register(struct drm_i915_private *i915); void i915_perf_unregister(struct drm_i915_private *i915); int i915_perf_ioctl_version(void); +void i915_perf_sysctl_register(void); +void i915_perf_sysctl_unregister(void); int i915_perf_open_ioctl(struct drm_device *dev, void *data, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h index 74ddc20a0d37..45e581455f5d 100644 --- a/drivers/gpu/drm/i915/i915_perf_types.h +++ b/drivers/gpu/drm/i915/i915_perf_types.h @@ -380,7 +380,6 @@ struct i915_perf { struct drm_i915_private *i915; struct kobject *metrics_kobj; - struct ctl_table_header *sysctl_header; /* * Lock associated with adding/modifying/removing OA configs -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] drm/i915: Fix typecheck macro in GT_TRACE
typecheck() macro creates an huge stack size causing issues with static analysis with coverity, addressing this with creating a local pointer. Fixes: 639f2f24895f ("drm/i915: Introduce new macros for tracing") Cc: Joonas Lahtinen Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota --- drivers/gpu/drm/i915/gt/intel_gt.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_gt.h b/drivers/gpu/drm/i915/gt/intel_gt.h index 9d9e8831daeb..2355cf129e9c 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.h +++ b/drivers/gpu/drm/i915/gt/intel_gt.h @@ -12,9 +12,9 @@ struct drm_i915_private; -#define GT_TRACE(gt__, fmt, ...) do { \ - typecheck(struct intel_gt, *(gt__));\ - GEM_TRACE("%s " fmt, dev_name(gt->i915->drm.dev), \ +#define GT_TRACE(gt, fmt, ...) do {\ + const struct intel_gt *gt__ __maybe_unused = (gt); \ + GEM_TRACE("%s " fmt, dev_name(gt__->i915->drm.dev),\ ##__VA_ARGS__); \ } while (0) -- 2.21.0.5.gaeb582a983 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx