Re: [PATCH] drm: fix deadlock of syncobj v6
Am 23.10.18 um 11:37 schrieb Chunming Zhou: > v2: > add a mutex between sync_cb execution and free. > v3: > clearly separating the roles for pt_lock and cb_mutex (Chris) > v4: > the cb_mutex should be taken outside of the pt_lock around > this if() block. (Chris) > v5: > fix a corner case > v6: > tidy drm_syncobj_fence_get_or_add_callback up. (Chris) > > Tested by syncobj_basic and syncobj_wait of igt. > > Signed-off-by: Chunming Zhou > Cc: Daniel Vetter > Cc: Chris Wilson > Cc: Christian König > Cc: intel-...@lists.freedesktop.org > Reviewed-by: Chris Wilson I've gone ahead and pushed this to drm-misc-next. Regards, Christian. > --- > drivers/gpu/drm/drm_syncobj.c | 156 -- > include/drm/drm_syncobj.h | 8 +- > 2 files changed, 81 insertions(+), 83 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 57bf6006394d..b7eaa603f368 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -106,6 +106,42 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file > *file_private, > } > EXPORT_SYMBOL(drm_syncobj_find); > > +static struct dma_fence > +*drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj, > + uint64_t point) > +{ > + struct drm_syncobj_signal_pt *signal_pt; > + > + if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) && > + (point <= syncobj->timeline)) { > + struct drm_syncobj_stub_fence *fence = > + kzalloc(sizeof(struct drm_syncobj_stub_fence), > + GFP_KERNEL); > + > + if (!fence) > + return NULL; > + spin_lock_init(>lock); > + dma_fence_init(>base, > +_syncobj_stub_fence_ops, > +>lock, > +syncobj->timeline_context, > +point); > + > + dma_fence_signal(>base); > + return >base; > + } > + > + list_for_each_entry(signal_pt, >signal_pt_list, list) { > + if (point > signal_pt->value) > + continue; > + if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) && > + (point != signal_pt->value)) > + continue; > + return dma_fence_get(_pt->fence_array->base); > + } > + return NULL; > +} > + > static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, > struct drm_syncobj_cb *cb, > drm_syncobj_func_t func) > @@ -114,115 +150,71 @@ static void drm_syncobj_add_callback_locked(struct > drm_syncobj *syncobj, > list_add_tail(>node, >cb_list); > } > > -static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > - struct dma_fence **fence, > - struct drm_syncobj_cb *cb, > - drm_syncobj_func_t func) > +static void drm_syncobj_fence_get_or_add_callback(struct drm_syncobj > *syncobj, > + struct dma_fence **fence, > + struct drm_syncobj_cb *cb, > + drm_syncobj_func_t func) > { > - int ret; > + u64 pt_value = 0; > > - ret = drm_syncobj_search_fence(syncobj, 0, 0, fence); > - if (!ret) > - return 1; > + if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) { > + /*BINARY syncobj always wait on last pt */ > + pt_value = syncobj->signal_point; > > - spin_lock(>lock); > - /* We've already tried once to get a fence and failed. Now that we > - * have the lock, try one more time just to be sure we don't add a > - * callback when a fence has already been set. > - */ > - if (!list_empty(>signal_pt_list)) { > - spin_unlock(>lock); > - drm_syncobj_search_fence(syncobj, 0, 0, fence); > - if (*fence) > - return 1; > - spin_lock(>lock); > - } else { > - *fence = NULL; > - drm_syncobj_add_callback_locked(syncobj, cb, func); > - ret = 0; > + if (pt_value == 0) > + pt_value += DRM_SYNCOBJ_BINARY_POINT; > } > - spin_unlock(>lock); > > - return ret; > + mutex_lock(>cb_mutex); > + spin_lock(>pt_lock); > + *fence = drm_syncobj_find_signal_pt_for_point(syncobj, pt_value); > + spin_unlock(>pt_lock); > + if (!*fence) > + drm_syncobj_add_callback_locked(syncobj, cb, func); > + mutex_unlock(>cb_mutex); > } > > void drm_syncobj_add_callback(struct drm_syncobj *syncobj, > struct drm_syncobj_cb *cb, >
[PATCH] drm: fix deadlock of syncobj v6
v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) v4: the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) v5: fix a corner case v6: tidy drm_syncobj_fence_get_or_add_callback up. (Chris) Tested by syncobj_basic and syncobj_wait of igt. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König Cc: intel-...@lists.freedesktop.org Reviewed-by: Chris Wilson --- drivers/gpu/drm/drm_syncobj.c | 156 -- include/drm/drm_syncobj.h | 8 +- 2 files changed, 81 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..b7eaa603f368 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -106,6 +106,42 @@ struct drm_syncobj *drm_syncobj_find(struct drm_file *file_private, } EXPORT_SYMBOL(drm_syncobj_find); +static struct dma_fence +*drm_syncobj_find_signal_pt_for_point(struct drm_syncobj *syncobj, + uint64_t point) +{ + struct drm_syncobj_signal_pt *signal_pt; + + if ((syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) && + (point <= syncobj->timeline)) { + struct drm_syncobj_stub_fence *fence = + kzalloc(sizeof(struct drm_syncobj_stub_fence), + GFP_KERNEL); + + if (!fence) + return NULL; + spin_lock_init(>lock); + dma_fence_init(>base, + _syncobj_stub_fence_ops, + >lock, + syncobj->timeline_context, + point); + + dma_fence_signal(>base); + return >base; + } + + list_for_each_entry(signal_pt, >signal_pt_list, list) { + if (point > signal_pt->value) + continue; + if ((syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) && + (point != signal_pt->value)) + continue; + return dma_fence_get(_pt->fence_array->base); + } + return NULL; +} + static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) @@ -114,115 +150,71 @@ static void drm_syncobj_add_callback_locked(struct drm_syncobj *syncobj, list_add_tail(>node, >cb_list); } -static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, -struct dma_fence **fence, -struct drm_syncobj_cb *cb, -drm_syncobj_func_t func) +static void drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, + struct dma_fence **fence, + struct drm_syncobj_cb *cb, + drm_syncobj_func_t func) { - int ret; + u64 pt_value = 0; - ret = drm_syncobj_search_fence(syncobj, 0, 0, fence); - if (!ret) - return 1; + if (syncobj->type == DRM_SYNCOBJ_TYPE_BINARY) { + /*BINARY syncobj always wait on last pt */ + pt_value = syncobj->signal_point; - spin_lock(>lock); - /* We've already tried once to get a fence and failed. Now that we -* have the lock, try one more time just to be sure we don't add a -* callback when a fence has already been set. -*/ - if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); - drm_syncobj_search_fence(syncobj, 0, 0, fence); - if (*fence) - return 1; - spin_lock(>lock); - } else { - *fence = NULL; - drm_syncobj_add_callback_locked(syncobj, cb, func); - ret = 0; + if (pt_value == 0) + pt_value += DRM_SYNCOBJ_BINARY_POINT; } - spin_unlock(>lock); - return ret; + mutex_lock(>cb_mutex); + spin_lock(>pt_lock); + *fence = drm_syncobj_find_signal_pt_for_point(syncobj, pt_value); + spin_unlock(>pt_lock); + if (!*fence) + drm_syncobj_add_callback_locked(syncobj, cb, func); + mutex_unlock(>cb_mutex); } void drm_syncobj_add_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) { - spin_lock(>lock); + mutex_lock(>cb_mutex); drm_syncobj_add_callback_locked(syncobj, cb, func); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } void
Re: [PATCH] drm: fix deadlock of syncobj v5
Am 23.10.18 um 11:11 schrieb Chris Wilson: > Quoting zhoucm1 (2018-10-23 10:09:01) >> >> On 2018年10月23日 17:01, Chris Wilson wrote: >>> Quoting Chunming Zhou (2018-10-23 08:57:54) v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) v4: the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) v5: fix a corner case Tested by syncobj_basic and syncobj_wait of igt. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König Cc: intel-...@lists.freedesktop.org Reviewed-by: Chris Wilson --- drivers/gpu/drm/drm_syncobj.c | 55 +++ include/drm/drm_syncobj.h | 8 +++-- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..679a56791e34 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(>lock); + mutex_lock(>cb_mutex); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); + spin_unlock(>pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); >>> Hmm, just thinking of other ways of tidying this up >>> >>> mutex_lock(cb_lock); >>> spin_lock(pt_lock); >>> *fence = drm_syncobj_find_signal_pt_for_point(); >>> spin_unlock(pt_list); >>> if (*!fence) >>>drm_syncobj_add_callback_locked(syncobj, cb, func); >>> mutex_unlock(cb_lock); >>> >>> i.e. get rid of the early return and we can even drop the int return here >>> as it is unimportant and unused. >> Yes, do you need I send v6? or you make a separate patch as a improvment? > Send it in reply, we still have some time before the shards catch up > with the ml ;) I'm idle anyway because I've locked myself out of the AMD VPN accidentally. So just send me a ping when the v6 is ready to be committed and I can push it to drm-misc-next. Christian. > -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj v5
Quoting zhoucm1 (2018-10-23 10:09:01) > > > On 2018年10月23日 17:01, Chris Wilson wrote: > > Quoting Chunming Zhou (2018-10-23 08:57:54) > >> v2: > >> add a mutex between sync_cb execution and free. > >> v3: > >> clearly separating the roles for pt_lock and cb_mutex (Chris) > >> v4: > >> the cb_mutex should be taken outside of the pt_lock around this if() > >> block. (Chris) > >> v5: > >> fix a corner case > >> > >> Tested by syncobj_basic and syncobj_wait of igt. > >> > >> Signed-off-by: Chunming Zhou > >> Cc: Daniel Vetter > >> Cc: Chris Wilson > >> Cc: Christian König > >> Cc: intel-...@lists.freedesktop.org > >> Reviewed-by: Chris Wilson > >> --- > >> drivers/gpu/drm/drm_syncobj.c | 55 +++ > >> include/drm/drm_syncobj.h | 8 +++-- > >> 2 files changed, 36 insertions(+), 27 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > >> index 57bf6006394d..679a56791e34 100644 > >> --- a/drivers/gpu/drm/drm_syncobj.c > >> +++ b/drivers/gpu/drm/drm_syncobj.c > >> @@ -125,23 +125,26 @@ static int > >> drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > >> if (!ret) > >> return 1; > >> > >> - spin_lock(>lock); > >> + mutex_lock(>cb_mutex); > >> /* We've already tried once to get a fence and failed. Now that > >> we > >> * have the lock, try one more time just to be sure we don't add a > >> * callback when a fence has already been set. > >> */ > >> + spin_lock(>pt_lock); > >> if (!list_empty(>signal_pt_list)) { > >> - spin_unlock(>lock); > >> + spin_unlock(>pt_lock); > >> drm_syncobj_search_fence(syncobj, 0, 0, fence); > > Hmm, just thinking of other ways of tidying this up > > > > mutex_lock(cb_lock); > > spin_lock(pt_lock); > > *fence = drm_syncobj_find_signal_pt_for_point(); > > spin_unlock(pt_list); > > if (*!fence) > > drm_syncobj_add_callback_locked(syncobj, cb, func); > > mutex_unlock(cb_lock); > > > > i.e. get rid of the early return and we can even drop the int return here > > as it is unimportant and unused. > Yes, do you need I send v6? or you make a separate patch as a improvment? Send it in reply, we still have some time before the shards catch up with the ml ;) -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj v5
On 2018年10月23日 17:01, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-23 08:57:54) v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) v4: the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) v5: fix a corner case Tested by syncobj_basic and syncobj_wait of igt. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König Cc: intel-...@lists.freedesktop.org Reviewed-by: Chris Wilson --- drivers/gpu/drm/drm_syncobj.c | 55 +++ include/drm/drm_syncobj.h | 8 +++-- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..679a56791e34 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(>lock); + mutex_lock(>cb_mutex); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); + spin_unlock(>pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); Hmm, just thinking of other ways of tidying this up mutex_lock(cb_lock); spin_lock(pt_lock); *fence = drm_syncobj_find_signal_pt_for_point(); spin_unlock(pt_list); if (*!fence) drm_syncobj_add_callback_locked(syncobj, cb, func); mutex_unlock(cb_lock); i.e. get rid of the early return and we can even drop the int return here as it is unimportant and unused. Yes, do you need I send v6? or you make a separate patch as a improvment? Thanks, David Zhou -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj v5
Quoting Chunming Zhou (2018-10-23 08:57:54) > v2: > add a mutex between sync_cb execution and free. > v3: > clearly separating the roles for pt_lock and cb_mutex (Chris) > v4: > the cb_mutex should be taken outside of the pt_lock around this if() block. > (Chris) > v5: > fix a corner case > > Tested by syncobj_basic and syncobj_wait of igt. > > Signed-off-by: Chunming Zhou > Cc: Daniel Vetter > Cc: Chris Wilson > Cc: Christian König > Cc: intel-...@lists.freedesktop.org > Reviewed-by: Chris Wilson > --- > drivers/gpu/drm/drm_syncobj.c | 55 +++ > include/drm/drm_syncobj.h | 8 +++-- > 2 files changed, 36 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 57bf6006394d..679a56791e34 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct > drm_syncobj *syncobj, > if (!ret) > return 1; > > - spin_lock(>lock); > + mutex_lock(>cb_mutex); > /* We've already tried once to get a fence and failed. Now that we > * have the lock, try one more time just to be sure we don't add a > * callback when a fence has already been set. > */ > + spin_lock(>pt_lock); > if (!list_empty(>signal_pt_list)) { > - spin_unlock(>lock); > + spin_unlock(>pt_lock); > drm_syncobj_search_fence(syncobj, 0, 0, fence); Hmm, just thinking of other ways of tidying this up mutex_lock(cb_lock); spin_lock(pt_lock); *fence = drm_syncobj_find_signal_pt_for_point(); spin_unlock(pt_list); if (*!fence) drm_syncobj_add_callback_locked(syncobj, cb, func); mutex_unlock(cb_lock); i.e. get rid of the early return and we can even drop the int return here as it is unimportant and unused. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj v4
Quoting zhoucm1 (2018-10-23 09:00:26) > > > On 2018年10月23日 15:51, Chris Wilson wrote: > > Quoting Chunming Zhou (2018-10-23 02:50:08) > >> v2: > >> add a mutex between sync_cb execution and free. > >> v3: > >> clearly separating the roles for pt_lock and cb_mutex (Chris) > >> v4: > >> the cb_mutex should be taken outside of the pt_lock around this if() > >> block. (Chris) > >> > >> Tested by syncobj_basic and syncobj_wait of igt. > >> > >> Signed-off-by: Chunming Zhou > >> Cc: Daniel Vetter > >> Cc: Chris Wilson > >> Cc: Christian König > >> --- > >> drivers/gpu/drm/drm_syncobj.c | 51 ++- > >> include/drm/drm_syncobj.h | 8 -- > >> 2 files changed, 33 insertions(+), 26 deletions(-) > >> > >> diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > >> index 57bf6006394d..315f08132f6d 100644 > >> --- a/drivers/gpu/drm/drm_syncobj.c > >> +++ b/drivers/gpu/drm/drm_syncobj.c > >> @@ -125,23 +125,24 @@ static int > >> drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, > >> if (!ret) > >> return 1; > >> > >> - spin_lock(>lock); > >> + mutex_lock(>cb_mutex); > >> /* We've already tried once to get a fence and failed. Now that > >> we > >> * have the lock, try one more time just to be sure we don't add a > >> * callback when a fence has already been set. > >> */ > >> + spin_lock(>pt_lock); > >> if (!list_empty(>signal_pt_list)) { > >> - spin_unlock(>lock); > >> + spin_unlock(>pt_lock); > >> drm_syncobj_search_fence(syncobj, 0, 0, fence); > >> if (*fence) > > mutex_unlock(>cb_mutex); > fixed. > > > > > With that, > > Reviewed-by: Chris Wilson > Thanks > > > > > Can you please resend with Cc: intel-...@lists.freedesktop.org so we can > > double check the fix. > resent, Could you help to submit patch to drm-misc? It should have time on the shards this afternoon, so all going to plan, yup. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj v4
On 2018年10月23日 15:51, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-23 02:50:08) v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) v4: the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) Tested by syncobj_basic and syncobj_wait of igt. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 51 ++- include/drm/drm_syncobj.h | 8 -- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..315f08132f6d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(>lock); + mutex_lock(>cb_mutex); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); + spin_unlock(>pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); if (*fence) mutex_unlock(>cb_mutex); fixed. With that, Reviewed-by: Chris Wilson Thanks Can you please resend with Cc: intel-...@lists.freedesktop.org so we can double check the fix. resent, Could you help to submit patch to drm-misc? Thanks, David Zhou -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm: fix deadlock of syncobj v5
v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) v4: the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) v5: fix a corner case Tested by syncobj_basic and syncobj_wait of igt. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König Cc: intel-...@lists.freedesktop.org Reviewed-by: Chris Wilson --- drivers/gpu/drm/drm_syncobj.c | 55 +++ include/drm/drm_syncobj.h | 8 +++-- 2 files changed, 36 insertions(+), 27 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..679a56791e34 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,26 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(>lock); + mutex_lock(>cb_mutex); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); + spin_unlock(>pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); - if (*fence) + if (*fence) { + mutex_unlock(>cb_mutex); return 1; - spin_lock(>lock); + } } else { + spin_unlock(>pt_lock); *fence = NULL; drm_syncobj_add_callback_locked(syncobj, cb, func); ret = 0; } - spin_unlock(>lock); + mutex_unlock(>cb_mutex); return ret; } @@ -150,43 +153,43 @@ void drm_syncobj_add_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) { - spin_lock(>lock); + mutex_lock(>cb_mutex); drm_syncobj_add_callback_locked(syncobj, cb, func); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb) { - spin_lock(>lock); + mutex_lock(>cb_mutex); list_del_init(>node); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } static void drm_syncobj_init(struct drm_syncobj *syncobj) { - spin_lock(>lock); + spin_lock(>pt_lock); syncobj->timeline_context = dma_fence_context_alloc(1); syncobj->timeline = 0; syncobj->signal_point = 0; init_waitqueue_head(>wq); INIT_LIST_HEAD(>signal_pt_list); - spin_unlock(>lock); + spin_unlock(>pt_lock); } static void drm_syncobj_fini(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; - spin_lock(>lock); + spin_lock(>pt_lock); list_for_each_entry_safe(signal_pt, tmp, >signal_pt_list, list) { list_del(_pt->list); dma_fence_put(_pt->fence_array->base); kfree(signal_pt); } - spin_unlock(>lock); + spin_unlock(>pt_lock); } static struct dma_fence @@ -249,14 +252,14 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, fences[num_fences++] = dma_fence_get(fence); /* timeline syncobj must take this dependency */ if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { - spin_lock(>lock); + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { tail_pt = list_last_entry(>signal_pt_list, struct drm_syncobj_signal_pt, list); fences[num_fences++] = dma_fence_get(_pt->fence_array->base); } - spin_unlock(>lock); + spin_unlock(>pt_lock); } signal_pt->fence_array = dma_fence_array_create(num_fences, fences, syncobj->timeline_context, @@ -266,16 +269,16 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, goto fail; } - spin_lock(>lock); + spin_lock(>pt_lock); if (syncobj->signal_point >= point) { DRM_WARN("A later signal is ready!"); - spin_unlock(>lock); + spin_unlock(>pt_lock); goto exist; } signal_pt->value = point; list_add_tail(_pt->list, >signal_pt_list); syncobj->signal_point = point; - spin_unlock(>lock); + spin_unlock(>pt_lock); wake_up_all(>wq); return 0;
Re: [PATCH] drm: fix deadlock of syncobj v4
Quoting Chunming Zhou (2018-10-23 02:50:08) > v2: > add a mutex between sync_cb execution and free. > v3: > clearly separating the roles for pt_lock and cb_mutex (Chris) > v4: > the cb_mutex should be taken outside of the pt_lock around this if() block. > (Chris) > > Tested by syncobj_basic and syncobj_wait of igt. > > Signed-off-by: Chunming Zhou > Cc: Daniel Vetter > Cc: Chris Wilson > Cc: Christian König > --- > drivers/gpu/drm/drm_syncobj.c | 51 ++- > include/drm/drm_syncobj.h | 8 -- > 2 files changed, 33 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 57bf6006394d..315f08132f6d 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct > drm_syncobj *syncobj, > if (!ret) > return 1; > > - spin_lock(>lock); > + mutex_lock(>cb_mutex); > /* We've already tried once to get a fence and failed. Now that we > * have the lock, try one more time just to be sure we don't add a > * callback when a fence has already been set. > */ > + spin_lock(>pt_lock); > if (!list_empty(>signal_pt_list)) { > - spin_unlock(>lock); > + spin_unlock(>pt_lock); > drm_syncobj_search_fence(syncobj, 0, 0, fence); > if (*fence) mutex_unlock(>cb_mutex); With that, Reviewed-by: Chris Wilson Can you please resend with Cc: intel-...@lists.freedesktop.org so we can double check the fix. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm: fix deadlock of syncobj v4
v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) v4: the cb_mutex should be taken outside of the pt_lock around this if() block. (Chris) Tested by syncobj_basic and syncobj_wait of igt. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 51 ++- include/drm/drm_syncobj.h | 8 -- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..315f08132f6d 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(>lock); + mutex_lock(>cb_mutex); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); + spin_unlock(>pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); if (*fence) return 1; - spin_lock(>lock); } else { + spin_unlock(>pt_lock); *fence = NULL; drm_syncobj_add_callback_locked(syncobj, cb, func); ret = 0; } - spin_unlock(>lock); + mutex_unlock(>cb_mutex); return ret; } @@ -150,43 +151,43 @@ void drm_syncobj_add_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) { - spin_lock(>lock); + mutex_lock(>cb_mutex); drm_syncobj_add_callback_locked(syncobj, cb, func); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb) { - spin_lock(>lock); + mutex_lock(>cb_mutex); list_del_init(>node); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } static void drm_syncobj_init(struct drm_syncobj *syncobj) { - spin_lock(>lock); + spin_lock(>pt_lock); syncobj->timeline_context = dma_fence_context_alloc(1); syncobj->timeline = 0; syncobj->signal_point = 0; init_waitqueue_head(>wq); INIT_LIST_HEAD(>signal_pt_list); - spin_unlock(>lock); + spin_unlock(>pt_lock); } static void drm_syncobj_fini(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; - spin_lock(>lock); + spin_lock(>pt_lock); list_for_each_entry_safe(signal_pt, tmp, >signal_pt_list, list) { list_del(_pt->list); dma_fence_put(_pt->fence_array->base); kfree(signal_pt); } - spin_unlock(>lock); + spin_unlock(>pt_lock); } static struct dma_fence @@ -249,14 +250,14 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, fences[num_fences++] = dma_fence_get(fence); /* timeline syncobj must take this dependency */ if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { - spin_lock(>lock); + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { tail_pt = list_last_entry(>signal_pt_list, struct drm_syncobj_signal_pt, list); fences[num_fences++] = dma_fence_get(_pt->fence_array->base); } - spin_unlock(>lock); + spin_unlock(>pt_lock); } signal_pt->fence_array = dma_fence_array_create(num_fences, fences, syncobj->timeline_context, @@ -266,16 +267,16 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, goto fail; } - spin_lock(>lock); + spin_lock(>pt_lock); if (syncobj->signal_point >= point) { DRM_WARN("A later signal is ready!"); - spin_unlock(>lock); + spin_unlock(>pt_lock); goto exist; } signal_pt->value = point; list_add_tail(_pt->list, >signal_pt_list); syncobj->signal_point = point; - spin_unlock(>lock); + spin_unlock(>pt_lock); wake_up_all(>wq); return 0; @@ -294,7 +295,7 @@ static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt; -
Re: [PATCH] drm: fix deadlock of syncobj v3
Quoting Chunming Zhou (2018-10-22 10:08:59) > v2: > add a mutex between sync_cb execution and free. > v3: > clearly separating the roles for pt_lock and cb_mutex (Chris) > > Signed-off-by: Chunming Zhou > Cc: Daniel Vetter > Cc: Chris Wilson > Cc: Christian König > --- > drivers/gpu/drm/drm_syncobj.c | 51 ++- > include/drm/drm_syncobj.h | 8 -- > 2 files changed, 33 insertions(+), 26 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 57bf6006394d..827d2ea5bb79 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct > drm_syncobj *syncobj, > if (!ret) > return 1; > > - spin_lock(>lock); > + spin_lock(>pt_lock); > /* We've already tried once to get a fence and failed. Now that we > * have the lock, try one more time just to be sure we don't add a > * callback when a fence has already been set. > */ > if (!list_empty(>signal_pt_list)) { > - spin_unlock(>lock); > + spin_unlock(>pt_lock); > drm_syncobj_search_fence(syncobj, 0, 0, fence); > if (*fence) > return 1; > - spin_lock(>lock); > } else { > + spin_unlock(>pt_lock); > *fence = NULL; > + mutex_lock(>cb_mutex); > drm_syncobj_add_callback_locked(syncobj, cb, func); > + mutex_unlock(>cb_mutex); There's no guard should the fence be installed after our check but before we add the callback. Aiui, the cb_mutex should be taken outside of the pt_lock around this if() block. > ret = 0; > } > - spin_unlock(>lock); > > return ret; > } ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm: fix deadlock of syncobj v3
v2: add a mutex between sync_cb execution and free. v3: clearly separating the roles for pt_lock and cb_mutex (Chris) Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 51 ++- include/drm/drm_syncobj.h | 8 -- 2 files changed, 33 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..827d2ea5bb79 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -125,23 +125,24 @@ static int drm_syncobj_fence_get_or_add_callback(struct drm_syncobj *syncobj, if (!ret) return 1; - spin_lock(>lock); + spin_lock(>pt_lock); /* We've already tried once to get a fence and failed. Now that we * have the lock, try one more time just to be sure we don't add a * callback when a fence has already been set. */ if (!list_empty(>signal_pt_list)) { - spin_unlock(>lock); + spin_unlock(>pt_lock); drm_syncobj_search_fence(syncobj, 0, 0, fence); if (*fence) return 1; - spin_lock(>lock); } else { + spin_unlock(>pt_lock); *fence = NULL; + mutex_lock(>cb_mutex); drm_syncobj_add_callback_locked(syncobj, cb, func); + mutex_unlock(>cb_mutex); ret = 0; } - spin_unlock(>lock); return ret; } @@ -150,43 +151,43 @@ void drm_syncobj_add_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb, drm_syncobj_func_t func) { - spin_lock(>lock); + mutex_lock(>cb_mutex); drm_syncobj_add_callback_locked(syncobj, cb, func); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb) { - spin_lock(>lock); + mutex_lock(>cb_mutex); list_del_init(>node); - spin_unlock(>lock); + mutex_unlock(>cb_mutex); } static void drm_syncobj_init(struct drm_syncobj *syncobj) { - spin_lock(>lock); + spin_lock(>pt_lock); syncobj->timeline_context = dma_fence_context_alloc(1); syncobj->timeline = 0; syncobj->signal_point = 0; init_waitqueue_head(>wq); INIT_LIST_HEAD(>signal_pt_list); - spin_unlock(>lock); + spin_unlock(>pt_lock); } static void drm_syncobj_fini(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt = NULL, *tmp; - spin_lock(>lock); + spin_lock(>pt_lock); list_for_each_entry_safe(signal_pt, tmp, >signal_pt_list, list) { list_del(_pt->list); dma_fence_put(_pt->fence_array->base); kfree(signal_pt); } - spin_unlock(>lock); + spin_unlock(>pt_lock); } static struct dma_fence @@ -249,14 +250,14 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, fences[num_fences++] = dma_fence_get(fence); /* timeline syncobj must take this dependency */ if (syncobj->type == DRM_SYNCOBJ_TYPE_TIMELINE) { - spin_lock(>lock); + spin_lock(>pt_lock); if (!list_empty(>signal_pt_list)) { tail_pt = list_last_entry(>signal_pt_list, struct drm_syncobj_signal_pt, list); fences[num_fences++] = dma_fence_get(_pt->fence_array->base); } - spin_unlock(>lock); + spin_unlock(>pt_lock); } signal_pt->fence_array = dma_fence_array_create(num_fences, fences, syncobj->timeline_context, @@ -266,16 +267,16 @@ static int drm_syncobj_create_signal_pt(struct drm_syncobj *syncobj, goto fail; } - spin_lock(>lock); + spin_lock(>pt_lock); if (syncobj->signal_point >= point) { DRM_WARN("A later signal is ready!"); - spin_unlock(>lock); + spin_unlock(>pt_lock); goto exist; } signal_pt->value = point; list_add_tail(_pt->list, >signal_pt_list); syncobj->signal_point = point; - spin_unlock(>lock); + spin_unlock(>pt_lock); wake_up_all(>wq); return 0; @@ -294,7 +295,7 @@ static void drm_syncobj_garbage_collection(struct drm_syncobj *syncobj) { struct drm_syncobj_signal_pt *signal_pt, *tmp, *tail_pt; - spin_lock(>lock); + spin_lock(>pt_lock); tail_pt = list_last_entry(>signal_pt_list,
Re: [PATCH] drm: fix deadlock of syncobj v2
On 2018年10月22日 16:34, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-21 12:14:24) v2: add a mutex between sync_cb execution and free. The result would appear to be that syncobj->lock is relegated to protecting the pt_list and the mutex would only be needed for the syncobj->cb_list. The patch looks correct for resolving the deadlock and avoiding introducing any new fails, but I do wonder if spinlock_t pt_lock; struct mutex cb_mutex; and clearly separating the roles would not be a better approach. good idea, thanks, -David -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj v2
Quoting Chunming Zhou (2018-10-21 12:14:24) > v2: > add a mutex between sync_cb execution and free. The result would appear to be that syncobj->lock is relegated to protecting the pt_list and the mutex would only be needed for the syncobj->cb_list. The patch looks correct for resolving the deadlock and avoiding introducing any new fails, but I do wonder if spinlock_t pt_lock; struct mutex cb_mutex; and clearly separating the roles would not be a better approach. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
RE: [PATCH] drm: fix deadlock of syncobj v2
Ping... Btw: The patch is tested by syncobj_basic and syncobj_wait of IGT. > -Original Message- > From: Chunming Zhou > Sent: Sunday, October 21, 2018 7:14 PM > To: dri-devel@lists.freedesktop.org > Cc: Zhou, David(ChunMing) ; Daniel Vetter > ; Chris Wilson ; Koenig, Christian > > Subject: [PATCH] drm: fix deadlock of syncobj v2 > > v2: > add a mutex between sync_cb execution and free. > > Signed-off-by: Chunming Zhou > Cc: Daniel Vetter > Cc: Chris Wilson > Cc: Christian König > --- > drivers/gpu/drm/drm_syncobj.c | 11 +-- > include/drm/drm_syncobj.h | 4 > 2 files changed, 13 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c > b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..c025a0b93565 > 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -158,9 +158,11 @@ void drm_syncobj_add_callback(struct drm_syncobj > *syncobj, void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, >struct drm_syncobj_cb *cb) > { > + mutex_lock(>mutex); > spin_lock(>lock); > list_del_init(>node); > spin_unlock(>lock); > + mutex_unlock(>mutex); > } > > static void drm_syncobj_init(struct drm_syncobj *syncobj) @@ -344,13 > +346,17 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, > drm_syncobj_create_signal_pt(syncobj, fence, pt_value); > if (fence) { > struct drm_syncobj_cb *cur, *tmp; > + LIST_HEAD(cb_list); > > + mutex_lock(>mutex); > spin_lock(>lock); > - list_for_each_entry_safe(cur, tmp, >cb_list, node) > { > + list_splice_init(>cb_list, _list); > + spin_unlock(>lock); > + list_for_each_entry_safe(cur, tmp, _list, node) { > list_del_init(>node); > cur->func(syncobj, cur); > } > - spin_unlock(>lock); > + mutex_unlock(>mutex); > } > } > EXPORT_SYMBOL(drm_syncobj_replace_fence); > @@ -501,6 +507,7 @@ int drm_syncobj_create(struct drm_syncobj > **out_syncobj, uint32_t flags, > kref_init(>refcount); > INIT_LIST_HEAD(>cb_list); > spin_lock_init(>lock); > + mutex_init(>mutex); > if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) > syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE; > else > diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index > 5e8c5c027e09..3d3c8c181bd2 100644 > --- a/include/drm/drm_syncobj.h > +++ b/include/drm/drm_syncobj.h > @@ -78,6 +78,10 @@ struct drm_syncobj { >* @lock: Protects syncobj list and write-locks >*/ > spinlock_t lock; > + /** > + * @mutex: mutex between syncobj cb execution and free. > + */ > + struct mutex mutex; > /** >* @file: A file backing for this syncobj. >*/ > -- > 2.17.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm: fix deadlock of syncobj v2
v2: add a mutex between sync_cb execution and free. Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 11 +-- include/drm/drm_syncobj.h | 4 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..c025a0b93565 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -158,9 +158,11 @@ void drm_syncobj_add_callback(struct drm_syncobj *syncobj, void drm_syncobj_remove_callback(struct drm_syncobj *syncobj, struct drm_syncobj_cb *cb) { + mutex_lock(>mutex); spin_lock(>lock); list_del_init(>node); spin_unlock(>lock); + mutex_unlock(>mutex); } static void drm_syncobj_init(struct drm_syncobj *syncobj) @@ -344,13 +346,17 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + LIST_HEAD(cb_list); + mutex_lock(>mutex); spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); cur->func(syncobj, cur); } - spin_unlock(>lock); + mutex_unlock(>mutex); } } EXPORT_SYMBOL(drm_syncobj_replace_fence); @@ -501,6 +507,7 @@ int drm_syncobj_create(struct drm_syncobj **out_syncobj, uint32_t flags, kref_init(>refcount); INIT_LIST_HEAD(>cb_list); spin_lock_init(>lock); + mutex_init(>mutex); if (flags & DRM_SYNCOBJ_CREATE_TYPE_TIMELINE) syncobj->type = DRM_SYNCOBJ_TYPE_TIMELINE; else diff --git a/include/drm/drm_syncobj.h b/include/drm/drm_syncobj.h index 5e8c5c027e09..3d3c8c181bd2 100644 --- a/include/drm/drm_syncobj.h +++ b/include/drm/drm_syncobj.h @@ -78,6 +78,10 @@ struct drm_syncobj { * @lock: Protects syncobj list and write-locks */ spinlock_t lock; + /** +* @mutex: mutex between syncobj cb execution and free. +*/ + struct mutex mutex; /** * @file: A file backing for this syncobj. */ -- 2.17.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj
Am 19.10.18 um 14:19 schrieb zhoucm1: On 2018年10月19日 20:08, Koenig, Christian wrote: Am 19.10.18 um 14:01 schrieb zhoucm1: On 2018年10月19日 19:26, zhoucm1 wrote: On 2018年10月19日 18:50, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-19 11:26:41) Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..2f3c14cb5156 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + INIT_LIST_HEAD(_list); LIST_HEAD(cb_list); // does both in one spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); Steal the snapshot of the list under the lock, ok. + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); Races against external caller of drm_syncobj_remove_callback(). However, it looks like that race is just fine, but we don't guard against the struct drm_syncobj_cb itself being freed, leading to all sort of fun for an interrupted drm_syncobj_array_wait_timeout. Thanks quick review, I will use "while (!list_empty()) { e = list_first_entry(); list_del(e)" to avoid deadlock. this still cannot resolve freeing problem, do you mind I change spinlock to mutex? How does that help? What you could do is to merge the array of fences into the beginning of the signal_pt, e.g. something like this: struct drm_syncobj_signal_pt { struct dma_fence fences[2]; struct dma_fence_array *fence_array; u64 value; struct list_head list; }; This way the drm_syncobj_signal_pt is freed when the fence_array is freed. That should be sufficient if we correctly reference count the fence_array. I'm not sure what problem you said, the deadlock reason is : Cb func will call drm_syncobj_search_fence, which will need to grab the lock, otherwise deadlock. But when we steal list or use "while (!list_empty()) { e = list_first_entry(); list_del(e)", both will encounter another freeing problem, that is syncobj_cb could be freed when wait timeout. If we change to use mutex, then we can move lock inside of _search_fence out. another way is we add a separate spinlock for signal_pt_list, not share one lock with cb_list. Well of hand that sounds like the cleanest approach to me. Christian. Regards, David Christian. Thanks, David Zhou will send v2 in one minute. Regards, David Zhou That kfree seems to undermine the validity of stealing the list. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj
On 2018年10月19日 20:08, Koenig, Christian wrote: Am 19.10.18 um 14:01 schrieb zhoucm1: On 2018年10月19日 19:26, zhoucm1 wrote: On 2018年10月19日 18:50, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-19 11:26:41) Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..2f3c14cb5156 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + INIT_LIST_HEAD(_list); LIST_HEAD(cb_list); // does both in one spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); Steal the snapshot of the list under the lock, ok. + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); Races against external caller of drm_syncobj_remove_callback(). However, it looks like that race is just fine, but we don't guard against the struct drm_syncobj_cb itself being freed, leading to all sort of fun for an interrupted drm_syncobj_array_wait_timeout. Thanks quick review, I will use "while (!list_empty()) { e = list_first_entry(); list_del(e)" to avoid deadlock. this still cannot resolve freeing problem, do you mind I change spinlock to mutex? How does that help? What you could do is to merge the array of fences into the beginning of the signal_pt, e.g. something like this: struct drm_syncobj_signal_pt { struct dma_fence fences[2]; struct dma_fence_array *fence_array; u64 value; struct list_head list; }; This way the drm_syncobj_signal_pt is freed when the fence_array is freed. That should be sufficient if we correctly reference count the fence_array. I'm not sure what problem you said, the deadlock reason is : Cb func will call drm_syncobj_search_fence, which will need to grab the lock, otherwise deadlock. But when we steal list or use "while (!list_empty()) { e = list_first_entry(); list_del(e)", both will encounter another freeing problem, that is syncobj_cb could be freed when wait timeout. If we change to use mutex, then we can move lock inside of _search_fence out. another way is we add a separate spinlock for signal_pt_list, not share one lock with cb_list. Regards, David Christian. Thanks, David Zhou will send v2 in one minute. Regards, David Zhou That kfree seems to undermine the validity of stealing the list. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj
Am 19.10.18 um 14:01 schrieb zhoucm1: > > > On 2018年10月19日 19:26, zhoucm1 wrote: >> >> >> On 2018年10月19日 18:50, Chris Wilson wrote: >>> Quoting Chunming Zhou (2018-10-19 11:26:41) Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..2f3c14cb5156 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + INIT_LIST_HEAD(_list); >>> LIST_HEAD(cb_list); // does both in one >>> spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); >>> Steal the snapshot of the list under the lock, ok. >>> + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); >>> Races against external caller of drm_syncobj_remove_callback(). >>> However, >>> it looks like that race is just fine, but we don't guard against the >>> struct drm_syncobj_cb itself being freed, leading to all sort of fun >>> for >>> an interrupted drm_syncobj_array_wait_timeout. >> Thanks quick review, I will use "while (!list_empty()) { e = >> list_first_entry(); list_del(e)" to avoid deadlock. > this still cannot resolve freeing problem, do you mind I change > spinlock to mutex? How does that help? What you could do is to merge the array of fences into the beginning of the signal_pt, e.g. something like this: struct drm_syncobj_signal_pt { struct dma_fence fences[2]; struct dma_fence_array *fence_array; u64 value; struct list_head list; }; This way the drm_syncobj_signal_pt is freed when the fence_array is freed. That should be sufficient if we correctly reference count the fence_array. Christian. > > Thanks, > David Zhou >> >> will send v2 in one minute. >> >> Regards, >> David Zhou >>> >>> That kfree seems to undermine the validity of stealing the list. >>> -Chris >> > ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj
On 2018年10月19日 19:26, zhoucm1 wrote: On 2018年10月19日 18:50, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-19 11:26:41) Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..2f3c14cb5156 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + INIT_LIST_HEAD(_list); LIST_HEAD(cb_list); // does both in one spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); Steal the snapshot of the list under the lock, ok. + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); Races against external caller of drm_syncobj_remove_callback(). However, it looks like that race is just fine, but we don't guard against the struct drm_syncobj_cb itself being freed, leading to all sort of fun for an interrupted drm_syncobj_array_wait_timeout. Thanks quick review, I will use "while (!list_empty()) { e = list_first_entry(); list_del(e)" to avoid deadlock. this still cannot resolve freeing problem, do you mind I change spinlock to mutex? Thanks, David Zhou will send v2 in one minute. Regards, David Zhou That kfree seems to undermine the validity of stealing the list. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj
On 2018年10月19日 18:50, Chris Wilson wrote: Quoting Chunming Zhou (2018-10-19 11:26:41) Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..2f3c14cb5156 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + INIT_LIST_HEAD(_list); LIST_HEAD(cb_list); // does both in one spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); Steal the snapshot of the list under the lock, ok. + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); Races against external caller of drm_syncobj_remove_callback(). However, it looks like that race is just fine, but we don't guard against the struct drm_syncobj_cb itself being freed, leading to all sort of fun for an interrupted drm_syncobj_array_wait_timeout. Thanks quick review, I will use "while (!list_empty()) { e = list_first_entry(); list_del(e)" to avoid deadlock. will send v2 in one minute. Regards, David Zhou That kfree seems to undermine the validity of stealing the list. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm: fix deadlock of syncobj
Quoting Chunming Zhou (2018-10-19 11:26:41) > Signed-off-by: Chunming Zhou > Cc: Daniel Vetter > Cc: Chris Wilson > Cc: Christian König > --- > drivers/gpu/drm/drm_syncobj.c | 7 +-- > 1 file changed, 5 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c > index 57bf6006394d..2f3c14cb5156 100644 > --- a/drivers/gpu/drm/drm_syncobj.c > +++ b/drivers/gpu/drm/drm_syncobj.c > @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj > *syncobj, > drm_syncobj_create_signal_pt(syncobj, fence, pt_value); > if (fence) { > struct drm_syncobj_cb *cur, *tmp; > + struct list_head cb_list; > + INIT_LIST_HEAD(_list); LIST_HEAD(cb_list); // does both in one > spin_lock(>lock); > - list_for_each_entry_safe(cur, tmp, >cb_list, node) { > + list_splice_init(>cb_list, _list); Steal the snapshot of the list under the lock, ok. > + spin_unlock(>lock); > + list_for_each_entry_safe(cur, tmp, _list, node) { > list_del_init(>node); Races against external caller of drm_syncobj_remove_callback(). However, it looks like that race is just fine, but we don't guard against the struct drm_syncobj_cb itself being freed, leading to all sort of fun for an interrupted drm_syncobj_array_wait_timeout. That kfree seems to undermine the validity of stealing the list. -Chris ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm: fix deadlock of syncobj
Signed-off-by: Chunming Zhou Cc: Daniel Vetter Cc: Chris Wilson Cc: Christian König --- drivers/gpu/drm/drm_syncobj.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/drm_syncobj.c b/drivers/gpu/drm/drm_syncobj.c index 57bf6006394d..2f3c14cb5156 100644 --- a/drivers/gpu/drm/drm_syncobj.c +++ b/drivers/gpu/drm/drm_syncobj.c @@ -344,13 +344,16 @@ void drm_syncobj_replace_fence(struct drm_syncobj *syncobj, drm_syncobj_create_signal_pt(syncobj, fence, pt_value); if (fence) { struct drm_syncobj_cb *cur, *tmp; + struct list_head cb_list; + INIT_LIST_HEAD(_list); spin_lock(>lock); - list_for_each_entry_safe(cur, tmp, >cb_list, node) { + list_splice_init(>cb_list, _list); + spin_unlock(>lock); + list_for_each_entry_safe(cur, tmp, _list, node) { list_del_init(>node); cur->func(syncobj, cur); } - spin_unlock(>lock); } } EXPORT_SYMBOL(drm_syncobj_replace_fence); -- 2.17.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel