Re: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
On Fri, Feb 07, 2020 at 05:10:40PM +0100, Daniel Vetter wrote: > On Wed, Jan 29, 2020 at 07:54:52PM +, Chris Wilson wrote: > > On Braswell and Broxton (also known as Valleyview and Apollolake), we > > need to serialise updates of the GGTT using the big stop_machine() > > hammer. This has the side effect of appearing to lockdep as a possible > > reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu > > allocations). However, we want to use vm->mutex (including ggtt->mutex) > > from wthin the shrinker and so must avoid such possible taints. For this > > purpose, we introduced the asynchronous vma binding and we can apply it > > to the PIN_GLOBAL so long as take care to add the necessary waits for > > the worker afterwards. > > > > Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 > > Signed-off-by: Chris Wilson > > Hm, isn't that just the usual "lockdep cant see past a wait on another > thread" trick that doesn't actually fix anything? The locking context for > the pte writes and the wait seem exactly the same, at first glance at > least. > > I dont' really have a different idea though :-/ 2nd one I just realized: dma_fence can't be blocked on anything that might go into reclaim, so doing this with a dma_fence also doesn't really work. -Daniel > > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++--- > > drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 + > > drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- > > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- > > drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--- > > drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++-- > > drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 ++-- > > drivers/gpu/drm/i915/i915_gem.c | 6 + > > drivers/gpu/drm/i915/i915_vma.c | 27 ++- > > drivers/gpu/drm/i915/i915_vma.h | 2 ++ > > 10 files changed, 46 insertions(+), 15 deletions(-) > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > index 39fe9a5b4820..2504f4d05edf 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > > @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs > > *engine, > > { > > unsigned int flags; > > > > - flags = PIN_GLOBAL; > > if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) > > /* > > * On g33, we cannot place HWS above 256MiB, so > > @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct > > intel_engine_cs *engine, > > * above the mappable region (even though we never > > * actually map it). > > */ > > - flags |= PIN_MAPPABLE; > > + flags = PIN_MAPPABLE; > > else > > - flags |= PIN_HIGH; > > + flags = PIN_HIGH; > > > > - return i915_vma_pin(vma, 0, 0, flags); > > + return i915_ggtt_pin(vma, 0, flags); > > } > > > > static int init_status_page(struct intel_engine_cs *engine) > > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c > > b/drivers/gpu/drm/i915/gt/intel_ggtt.c > > index 79096722ce16..6af50d62d712 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c > > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c > > @@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) > > ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; > > if (ggtt->vm.clear_range != nop_clear_range) > > ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; > > + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; > > } > > > > ggtt->invalidate = gen8_ggtt_invalidate; > > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c > > b/drivers/gpu/drm/i915/gt/intel_gt.c > > index 143268083135..bf6c0f949e35 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > > @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, > > unsigned int size) > > goto err_unref; > > } > > > > - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > > + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); > > if (ret) > > goto err_unref; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c > > b/drivers/gpu/drm/i915/gt/intel_lrc.c > > index 8f15ab7d8d88..e63ae4a17110 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > > @@ -3259,7 +3259,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs > > *engine) > > goto err; > > } > > > > - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > > + err = i915_ggtt_pin(vma, 0, PIN_HIGH); > > if (err) > > goto err; > > > > diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c > > b/drivers/gpu/drm/i915/gt/intel_ring.c > > index 374b28f13ca0..366013367526 100644 > > --- a/drivers/gpu/drm/i915/gt/intel_ring.c > > +++ b/dr
Re: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
On Wed, Jan 29, 2020 at 07:54:52PM +, Chris Wilson wrote: > On Braswell and Broxton (also known as Valleyview and Apollolake), we > need to serialise updates of the GGTT using the big stop_machine() > hammer. This has the side effect of appearing to lockdep as a possible > reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu > allocations). However, we want to use vm->mutex (including ggtt->mutex) > from wthin the shrinker and so must avoid such possible taints. For this > purpose, we introduced the asynchronous vma binding and we can apply it > to the PIN_GLOBAL so long as take care to add the necessary waits for > the worker afterwards. > > Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 > Signed-off-by: Chris Wilson Hm, isn't that just the usual "lockdep cant see past a wait on another thread" trick that doesn't actually fix anything? The locking context for the pte writes and the wait seem exactly the same, at first glance at least. I dont' really have a different idea though :-/ -Daniel > --- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++--- > drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 + > drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- > drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--- > drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++-- > drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 ++-- > drivers/gpu/drm/i915/i915_gem.c | 6 + > drivers/gpu/drm/i915/i915_vma.c | 27 ++- > drivers/gpu/drm/i915/i915_vma.h | 2 ++ > 10 files changed, 46 insertions(+), 15 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > index 39fe9a5b4820..2504f4d05edf 100644 > --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c > +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c > @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs > *engine, > { > unsigned int flags; > > - flags = PIN_GLOBAL; > if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) > /* >* On g33, we cannot place HWS above 256MiB, so > @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs > *engine, >* above the mappable region (even though we never >* actually map it). >*/ > - flags |= PIN_MAPPABLE; > + flags = PIN_MAPPABLE; > else > - flags |= PIN_HIGH; > + flags = PIN_HIGH; > > - return i915_vma_pin(vma, 0, 0, flags); > + return i915_ggtt_pin(vma, 0, flags); > } > > static int init_status_page(struct intel_engine_cs *engine) > diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c > b/drivers/gpu/drm/i915/gt/intel_ggtt.c > index 79096722ce16..6af50d62d712 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c > +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c > @@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) > ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; > if (ggtt->vm.clear_range != nop_clear_range) > ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; > + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; > } > > ggtt->invalidate = gen8_ggtt_invalidate; > diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c > b/drivers/gpu/drm/i915/gt/intel_gt.c > index 143268083135..bf6c0f949e35 100644 > --- a/drivers/gpu/drm/i915/gt/intel_gt.c > +++ b/drivers/gpu/drm/i915/gt/intel_gt.c > @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, > unsigned int size) > goto err_unref; > } > > - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); > if (ret) > goto err_unref; > > diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c > b/drivers/gpu/drm/i915/gt/intel_lrc.c > index 8f15ab7d8d88..e63ae4a17110 100644 > --- a/drivers/gpu/drm/i915/gt/intel_lrc.c > +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c > @@ -3259,7 +3259,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs > *engine) > goto err; > } > > - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); > + err = i915_ggtt_pin(vma, 0, PIN_HIGH); > if (err) > goto err; > > diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c > b/drivers/gpu/drm/i915/gt/intel_ring.c > index 374b28f13ca0..366013367526 100644 > --- a/drivers/gpu/drm/i915/gt/intel_ring.c > +++ b/drivers/gpu/drm/i915/gt/intel_ring.c > @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) > if (atomic_fetch_inc(&ring->pin_count)) > return 0; > > - flags = PIN_GLOBAL; > - > /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ > - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); > + flags = PI
Re: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
>-Original Message- >From: Intel-gfx On Behalf Of Chris >Wilson >Sent: Thursday, January 30, 2020 10:21 AM >To: intel-gfx@lists.freedesktop.org >Subject: [Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim >tainting the ggtt->mutex > >On Braswell and Broxton (also known as Valleyview and Apollolake), we >need to serialise updates of the GGTT using the big stop_machine() >hammer. This has the side effect of appearing to lockdep as a possible >reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu >allocations). However, we want to use vm->mutex (including ggtt->mutex) >from wthin the shrinker and so must avoid such possible taints. For this s/wthin/within m >purpose, we introduced the asynchronous vma binding and we can apply it >to the PIN_GLOBAL so long as take care to add the necessary waits for >the worker afterwards. > >Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 >Signed-off-by: Chris Wilson >--- > drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++--- > drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 +- > drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- > drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- > drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++-- > drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +-- > drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 +-- > drivers/gpu/drm/i915/i915_active.c| 10 -- > drivers/gpu/drm/i915/i915_active.h| 3 +- > drivers/gpu/drm/i915/i915_gem.c | 6 > drivers/gpu/drm/i915/i915_vma.c | 38 +-- > drivers/gpu/drm/i915/i915_vma.h | 2 ++ > 12 files changed, 66 insertions(+), 21 deletions(-) > >diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c >b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >index decb63462410..86af5edd6933 100644 >--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c >+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c >@@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs >*engine, > { > unsigned int flags; > >- flags = PIN_GLOBAL; > if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt- >>ggtt)) > /* >* On g33, we cannot place HWS above 256MiB, so >@@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct >intel_engine_cs *engine, >* above the mappable region (even though we never >* actually map it). >*/ >- flags |= PIN_MAPPABLE; >+ flags = PIN_MAPPABLE; > else >- flags |= PIN_HIGH; >+ flags = PIN_HIGH; > >- return i915_vma_pin(vma, 0, 0, flags); >+ return i915_ggtt_pin(vma, 0, flags); > } > > static int init_status_page(struct intel_engine_cs *engine) >diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c >b/drivers/gpu/drm/i915/gt/intel_ggtt.c >index f624fc5c19c3..d9fd25480a46 100644 >--- a/drivers/gpu/drm/i915/gt/intel_ggtt.c >+++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c >@@ -109,7 +109,7 @@ static void ggtt_suspend_mappings(struct i915_ggtt >*ggtt) > struct i915_vma *vma; > > list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) >- i915_vma_sync(vma); >+ i915_vma_wait_for_bind(vma); > > ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); > ggtt->invalidate(ggtt); >@@ -851,6 +851,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) > IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { > ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; > ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; >+ ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; > } > > ggtt->invalidate = gen8_ggtt_invalidate; >diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c >b/drivers/gpu/drm/i915/gt/intel_gt.c >index 51019611bc1e..f1f1b306e0af 100644 >--- a/drivers/gpu/drm/i915/gt/intel_gt.c >+++ b/drivers/gpu/drm/i915/gt/intel_gt.c >@@ -344,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, >unsigned int size) > goto err_unref; > } > >- ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); >+ ret = i915_ggtt_pin(vma, 0, PIN_HIGH); > if (ret) > goto err_unref; > >diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c >b/drivers/gpu/drm/i915/gt/intel_lrc.c >index eb83c87c8b4e..fc0a72cc54fd 100644 >--- a/drivers/gpu/drm/i915/gt/intel_lrc.c >+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c >@@ -3268,7 +3268,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs >*engine) >
[Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
On Braswell and Broxton (also known as Valleyview and Apollolake), we need to serialise updates of the GGTT using the big stop_machine() hammer. This has the side effect of appearing to lockdep as a possible reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu allocations). However, we want to use vm->mutex (including ggtt->mutex) from wthin the shrinker and so must avoid such possible taints. For this purpose, we introduced the asynchronous vma binding and we can apply it to the PIN_GLOBAL so long as take care to add the necessary waits for the worker afterwards. Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++--- drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 +- drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++-- drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +-- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 +-- drivers/gpu/drm/i915/i915_active.c| 10 -- drivers/gpu/drm/i915/i915_active.h| 3 +- drivers/gpu/drm/i915/i915_gem.c | 6 drivers/gpu/drm/i915/i915_vma.c | 38 +-- drivers/gpu/drm/i915/i915_vma.h | 2 ++ 12 files changed, 66 insertions(+), 21 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index decb63462410..86af5edd6933 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, { unsigned int flags; - flags = PIN_GLOBAL; if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, * above the mappable region (even though we never * actually map it). */ - flags |= PIN_MAPPABLE; + flags = PIN_MAPPABLE; else - flags |= PIN_HIGH; + flags = PIN_HIGH; - return i915_vma_pin(vma, 0, 0, flags); + return i915_ggtt_pin(vma, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index f624fc5c19c3..d9fd25480a46 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -109,7 +109,7 @@ static void ggtt_suspend_mappings(struct i915_ggtt *ggtt) struct i915_vma *vma; list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) - i915_vma_sync(vma); + i915_vma_wait_for_bind(vma); ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); ggtt->invalidate(ggtt); @@ -851,6 +851,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; } ggtt->invalidate = gen8_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 51019611bc1e..f1f1b306e0af 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -344,7 +344,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); if (ret) goto err_unref; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eb83c87c8b4e..fc0a72cc54fd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3268,7 +3268,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0..366013367526 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) return 0; - flags = PIN_GLOBAL; - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + flags = PIN_OFFSET_BIAS | i915_ggtt_p
[Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
On Braswell and Broxton (also known as Valleyview and Apollolake), we need to serialise updates of the GGTT using the big stop_machine() hammer. This has the side effect of appearing to lockdep as a possible reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu allocations). However, we want to use vm->mutex (including ggtt->mutex) from wthin the shrinker and so must avoid such possible taints. For this purpose, we introduced the asynchronous vma binding and we can apply it to the PIN_GLOBAL so long as take care to add the necessary waits for the worker afterwards. Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 ++--- drivers/gpu/drm/i915/gt/intel_ggtt.c | 5 +-- drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++-- drivers/gpu/drm/i915/gt/intel_timeline.c | 4 +-- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 +-- drivers/gpu/drm/i915/i915_active.c| 10 -- drivers/gpu/drm/i915/i915_active.h| 3 +- drivers/gpu/drm/i915/i915_gem.c | 6 drivers/gpu/drm/i915/i915_vma.c | 38 +-- drivers/gpu/drm/i915/i915_vma.h | 2 ++ 12 files changed, 65 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 8707d7264490..449bf7770300 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, { unsigned int flags; - flags = PIN_GLOBAL; if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, * above the mappable region (even though we never * actually map it). */ - flags |= PIN_MAPPABLE; + flags = PIN_MAPPABLE; else - flags |= PIN_HIGH; + flags = PIN_HIGH; - return i915_vma_pin(vma, 0, 0, flags); + return i915_ggtt_pin(vma, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 81a63f7bc6c4..3106424d17d7 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -854,6 +854,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) IS_CHERRYVIEW(i915) /* fails with concurrent use/update */) { ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; } ggtt->invalidate = gen8_ggtt_invalidate; @@ -1161,8 +1162,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) intel_gt_check_and_clear_faults(ggtt->vm.gt); - mutex_lock(&ggtt->vm.mutex); - /* First fill our portion of the GTT with scratch pages */ ggtt->vm.clear_range(&ggtt->vm, 0, ggtt->vm.total); @@ -1189,8 +1188,6 @@ static void ggtt_restore_mappings(struct i915_ggtt *ggtt) atomic_set(&ggtt->vm.open, open); ggtt->invalidate(ggtt); - mutex_unlock(&ggtt->vm.mutex); - if (flush) wbinvd_on_all_cpus(); } diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 143268083135..bf6c0f949e35 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); if (ret) goto err_unref; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index eb83c87c8b4e..fc0a72cc54fd 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3268,7 +3268,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0..366013367526 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) retu
[Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
On Braswell and Broxton (also known as Valleyview and Apollolake), we need to serialise updates of the GGTT using the big stop_machine() hammer. This has the side effect of appearing to lockdep as a possible reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu allocations). However, we want to use vm->mutex (including ggtt->mutex) from wthin the shrinker and so must avoid such possible taints. For this purpose, we introduced the asynchronous vma binding and we can apply it to the PIN_GLOBAL so long as take care to add the necessary waits for the worker afterwards. Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++--- drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 + drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--- drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 6 + drivers/gpu/drm/i915/i915_vma.c | 27 ++- drivers/gpu/drm/i915/i915_vma.h | 2 ++ 10 files changed, 46 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 39fe9a5b4820..2504f4d05edf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, { unsigned int flags; - flags = PIN_GLOBAL; if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, * above the mappable region (even though we never * actually map it). */ - flags |= PIN_MAPPABLE; + flags = PIN_MAPPABLE; else - flags |= PIN_HIGH; + flags = PIN_HIGH; - return i915_vma_pin(vma, 0, 0, flags); + return i915_ggtt_pin(vma, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 79096722ce16..6af50d62d712 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; if (ggtt->vm.clear_range != nop_clear_range) ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; } ggtt->invalidate = gen8_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 143268083135..bf6c0f949e35 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); if (ret) goto err_unref; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 9a0d0282f3ca..5906fc7df2a4 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3263,7 +3263,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0..366013367526 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) return 0; - flags = PIN_GLOBAL; - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); if (vma->obj->stolen) flags |= PIN_MAPPABLE; else flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 0, flags); + ret = i915_ggtt_pin(vma, 0, flags); if (unlikely(ret)) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 87716529cd2f..465f87b65901 100644 --- a/drivers/gpu/drm/i915/gt/intel_ti
[Intel-gfx] [PATCH] drm/i915: Use the async worker to avoid reclaim tainting the ggtt->mutex
On Braswell and Broxton (also known as Valleyview and Apollolake), we need to serialise updates of the GGTT using the big stop_machine() hammer. This has the side effect of appearing to lockdep as a possible reclaim (since it uses the cpuhp mutex and that is tainted by per-cpu allocations). However, we want to use vm->mutex (including ggtt->mutex) from wthin the shrinker and so must avoid such possible taints. For this purpose, we introduced the asynchronous vma binding and we can apply it to the PIN_GLOBAL so long as take care to add the necessary waits for the worker afterwards. Closes: https://gitlab.freedesktop.org/drm/intel/issues/211 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 7 +++--- drivers/gpu/drm/i915/gt/intel_ggtt.c | 1 + drivers/gpu/drm/i915/gt/intel_gt.c| 2 +- drivers/gpu/drm/i915/gt/intel_lrc.c | 2 +- drivers/gpu/drm/i915/gt/intel_ring.c | 6 ++--- drivers/gpu/drm/i915/gt/intel_timeline.c | 4 ++-- drivers/gpu/drm/i915/gt/uc/intel_guc.c| 4 ++-- drivers/gpu/drm/i915/i915_gem.c | 6 + drivers/gpu/drm/i915/i915_vma.c | 27 ++- drivers/gpu/drm/i915/i915_vma.h | 2 ++ 10 files changed, 46 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 39fe9a5b4820..2504f4d05edf 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -527,7 +527,6 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, { unsigned int flags; - flags = PIN_GLOBAL; if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) /* * On g33, we cannot place HWS above 256MiB, so @@ -540,11 +539,11 @@ static int pin_ggtt_status_page(struct intel_engine_cs *engine, * above the mappable region (even though we never * actually map it). */ - flags |= PIN_MAPPABLE; + flags = PIN_MAPPABLE; else - flags |= PIN_HIGH; + flags = PIN_HIGH; - return i915_vma_pin(vma, 0, 0, flags); + return i915_ggtt_pin(vma, 0, flags); } static int init_status_page(struct intel_engine_cs *engine) diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index 79096722ce16..6af50d62d712 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -881,6 +881,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.insert_page= bxt_vtd_ggtt_insert_page__BKL; if (ggtt->vm.clear_range != nop_clear_range) ggtt->vm.clear_range = bxt_vtd_ggtt_clear_range__BKL; + ggtt->vm.bind_async_flags = I915_VMA_GLOBAL_BIND; } ggtt->invalidate = gen8_ggtt_invalidate; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 143268083135..bf6c0f949e35 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -345,7 +345,7 @@ static int intel_gt_init_scratch(struct intel_gt *gt, unsigned int size) goto err_unref; } - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + ret = i915_ggtt_pin(vma, 0, PIN_HIGH); if (ret) goto err_unref; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 8f15ab7d8d88..e63ae4a17110 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -3259,7 +3259,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) goto err; } - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH); + err = i915_ggtt_pin(vma, 0, PIN_HIGH); if (err) goto err; diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 374b28f13ca0..366013367526 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -31,17 +31,15 @@ int intel_ring_pin(struct intel_ring *ring) if (atomic_fetch_inc(&ring->pin_count)) return 0; - flags = PIN_GLOBAL; - /* Ring wraparound at offset 0 sometimes hangs. No idea why. */ - flags |= PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); + flags = PIN_OFFSET_BIAS | i915_ggtt_pin_bias(vma); if (vma->obj->stolen) flags |= PIN_MAPPABLE; else flags |= PIN_HIGH; - ret = i915_vma_pin(vma, 0, 0, flags); + ret = i915_ggtt_pin(vma, 0, flags); if (unlikely(ret)) goto err_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 87716529cd2f..465f87b65901 100644 --- a/drivers/gpu/drm/i915/gt/intel_ti