Re: [Intel-gfx] [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.

2019-01-17 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-01-17 16:23:48)
> 
> On 16/01/2019 17:01, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2019-01-16 16:47:43)
> >>
> >> On 07/01/2019 11:54, Chris Wilson wrote:
> >>> @@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, 
> >>> void *data,
> >>>
> >>>static void i915_gem_object_bump_inactive_ggtt(struct 
> >>> drm_i915_gem_object *obj)
> >>>{
> >>> - struct drm_i915_private *i915;
> >>> + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >>>struct list_head *list;
> >>>struct i915_vma *vma;
> >>>
> >>>GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> >>>
> >>> + mutex_lock(>ggtt.vm.mutex);
> >>>for_each_ggtt_vma(vma, obj) {
> >>>if (!drm_mm_node_allocated(>node))
> >>>continue;
> >>>
> >>>list_move_tail(>vm_link, >vm->bound_list);
> >>>}
> >>> + mutex_unlock(>ggtt.vm.mutex);
> >>
> >> This is now struct_mutex -> vm->mutex nesting, which we would preferably
> >> want to avoid? There only two callers for the function.
> >>
> >> It looks we could remove nesting from i915_gem_set_domain_ioctl by just
> >> moving the call to after mutex unlock.
> >>
> >> i915_gem_object_unpin_from_display_plane callers are not as easy so
> >> maybe at least do the one above?
> > 
> > unpin_from_display_plane is the goal here tbh.
> > 
> >>> - i915 = to_i915(obj->base.dev);
> >>>spin_lock(>mm.obj_lock);
> >>>list = obj->bind_count ? >mm.bound_list : 
> >>> >mm.unbound_list;
> >>>list_move_tail(>mm.link, list);
> >>> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
> >>> b/drivers/gpu/drm/i915/i915_gem_evict.c
> >>> index a76f65fe86be..4a0c6830659d 100644
> >>> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> >>> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> >>> @@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >>>}
> >>>
> >>>INIT_LIST_HEAD(_list);
> >>> + mutex_lock(>mutex);
> >>>list_for_each_entry(vma, >bound_list, vm_link) {
> >>>if (i915_vma_is_pinned(vma))
> >>>continue;
> >>> @@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >>>__i915_vma_pin(vma);
> >>>list_add(>evict_link, _list);
> >>>}
> >>> + mutex_unlock(>mutex);
> >>
> >> This is another nesting so I suppose you leave all this fun for later?

Yes, I remember some more of the fun that made me defer this task -- and
that was the random waits we could hit, requiring the GPU reset dilemma
be resolved (i.e. reworking reset to avoid taking any these resets,
which also prevents us from hitting these from the shrinker).

> > Yes, the intent was to put the locks in place (gradually) then peel back
> > struct_mutex (gradually).
> > 
> >>> @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
> >>>
> >>>vma->ops->clear_pages(vma);
> >>>
> >>> + mutex_lock(>vm->mutex);
> >>>drm_mm_remove_node(>node);
> >>
> >> This is by design also protected by the vm->mutex? But insertion is not
> >> AFAICT.
> > 
> > Not yet. Can you guess which bit proved tricky? ;) Getting the right
> > point to lock for execbuf, and eviction. At the same time over there is
> > the fuss with ww_mutex, as well as contexts et al, and it all gets
> > confusing quickly.
> > 
> > ...(tries to remember why this patch is actually here; this set was
> > picked so that I could do obj->vma_list without struct_mutex (which
> > was used for timeline allocation) and I pulled in anything required to
> > resolve conflicts, but why this one)...
> 
> Have you figured it out in the meantime?

The patch does at it says and protects the vma->vm_link/vm->*_list. You
start to look at trying to decide if i915_vma_pin() does atomic magic or
if it should require the caller lock(vm->mutex), and I quickly descend
into wanting to do the domain+fence management of
ww_mutex_lock(obj->resv.lock) instead.

Bah, make the caller take vm->mutex and then we can see if that is
better than atomic magic later.
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.

2019-01-17 Thread Tvrtko Ursulin


On 16/01/2019 17:01, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2019-01-16 16:47:43)


On 07/01/2019 11:54, Chris Wilson wrote:

@@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
   
   static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)

   {
- struct drm_i915_private *i915;
+ struct drm_i915_private *i915 = to_i915(obj->base.dev);
   struct list_head *list;
   struct i915_vma *vma;
   
   GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
   
+ mutex_lock(>ggtt.vm.mutex);

   for_each_ggtt_vma(vma, obj) {
   if (!drm_mm_node_allocated(>node))
   continue;
   
   list_move_tail(>vm_link, >vm->bound_list);

   }
+ mutex_unlock(>ggtt.vm.mutex);


This is now struct_mutex -> vm->mutex nesting, which we would preferably
want to avoid? There only two callers for the function.

It looks we could remove nesting from i915_gem_set_domain_ioctl by just
moving the call to after mutex unlock.

i915_gem_object_unpin_from_display_plane callers are not as easy so
maybe at least do the one above?


unpin_from_display_plane is the goal here tbh.


- i915 = to_i915(obj->base.dev);
   spin_lock(>mm.obj_lock);
   list = obj->bind_count ? >mm.bound_list : >mm.unbound_list;
   list_move_tail(>mm.link, list);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index a76f65fe86be..4a0c6830659d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
   }
   
   INIT_LIST_HEAD(_list);

+ mutex_lock(>mutex);
   list_for_each_entry(vma, >bound_list, vm_link) {
   if (i915_vma_is_pinned(vma))
   continue;
@@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
   __i915_vma_pin(vma);
   list_add(>evict_link, _list);
   }
+ mutex_unlock(>mutex);


This is another nesting so I suppose you leave all this fun for later?


Yes, the intent was to put the locks in place (gradually) then peel back
struct_mutex (gradually).


@@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
   
   vma->ops->clear_pages(vma);
   
+ mutex_lock(>vm->mutex);

   drm_mm_remove_node(>node);


This is by design also protected by the vm->mutex? But insertion is not
AFAICT.


Not yet. Can you guess which bit proved tricky? ;) Getting the right
point to lock for execbuf, and eviction. At the same time over there is
the fuss with ww_mutex, as well as contexts et al, and it all gets
confusing quickly.

...(tries to remember why this patch is actually here; this set was
picked so that I could do obj->vma_list without struct_mutex (which
was used for timeline allocation) and I pulled in anything required to
resolve conflicts, but why this one)...


Have you figured it out in the meantime?

Regards,

Tvrtko
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.

2019-01-16 Thread Chris Wilson
Quoting Tvrtko Ursulin (2019-01-16 16:47:43)
> 
> On 07/01/2019 11:54, Chris Wilson wrote:
> > @@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
> > *data,
> >   
> >   static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object 
> > *obj)
> >   {
> > - struct drm_i915_private *i915;
> > + struct drm_i915_private *i915 = to_i915(obj->base.dev);
> >   struct list_head *list;
> >   struct i915_vma *vma;
> >   
> >   GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
> >   
> > + mutex_lock(>ggtt.vm.mutex);
> >   for_each_ggtt_vma(vma, obj) {
> >   if (!drm_mm_node_allocated(>node))
> >   continue;
> >   
> >   list_move_tail(>vm_link, >vm->bound_list);
> >   }
> > + mutex_unlock(>ggtt.vm.mutex);
> 
> This is now struct_mutex -> vm->mutex nesting, which we would preferably 
> want to avoid? There only two callers for the function.
> 
> It looks we could remove nesting from i915_gem_set_domain_ioctl by just 
> moving the call to after mutex unlock.
> 
> i915_gem_object_unpin_from_display_plane callers are not as easy so 
> maybe at least do the one above?

unpin_from_display_plane is the goal here tbh.

> > - i915 = to_i915(obj->base.dev);
> >   spin_lock(>mm.obj_lock);
> >   list = obj->bind_count ? >mm.bound_list : 
> > >mm.unbound_list;
> >   list_move_tail(>mm.link, list);
> > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
> > b/drivers/gpu/drm/i915/i915_gem_evict.c
> > index a76f65fe86be..4a0c6830659d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> > @@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >   }
> >   
> >   INIT_LIST_HEAD(_list);
> > + mutex_lock(>mutex);
> >   list_for_each_entry(vma, >bound_list, vm_link) {
> >   if (i915_vma_is_pinned(vma))
> >   continue;
> > @@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
> >   __i915_vma_pin(vma);
> >   list_add(>evict_link, _list);
> >   }
> > + mutex_unlock(>mutex);
> 
> This is another nesting so I suppose you leave all this fun for later?

Yes, the intent was to put the locks in place (gradually) then peel back
struct_mutex (gradually).

> > @@ -689,8 +694,10 @@ i915_vma_remove(struct i915_vma *vma)
> >   
> >   vma->ops->clear_pages(vma);
> >   
> > + mutex_lock(>vm->mutex);
> >   drm_mm_remove_node(>node);
> 
> This is by design also protected by the vm->mutex? But insertion is not 
> AFAICT.

Not yet. Can you guess which bit proved tricky? ;) Getting the right
point to lock for execbuf, and eviction. At the same time over there is
the fuss with ww_mutex, as well as contexts et al, and it all gets
confusing quickly.

...(tries to remember why this patch is actually here; this set was
picked so that I could do obj->vma_list without struct_mutex (which
was used for timeline allocation) and I pulled in anything required to
resolve conflicts, but why this one)...
-Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.

2019-01-16 Thread Tvrtko Ursulin


On 07/01/2019 11:54, Chris Wilson wrote:

A starting point to counter the pervasive struct_mutex. For the goal of
avoiding (or at least blocking under them!) global locks during user
request submission, a simple but important step is being able to manage
each clients GTT separately. For which, we want to replace using the
struct_mutex as the guard for all things GTT/VM and switch instead to a
specific mutex inside i915_address_space.

Signed-off-by: Chris Wilson 
---
  drivers/gpu/drm/i915/i915_gem.c | 14 --
  drivers/gpu/drm/i915/i915_gem_evict.c   |  2 ++
  drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +--
  drivers/gpu/drm/i915/i915_gem_shrinker.c|  4 
  drivers/gpu/drm/i915/i915_gem_stolen.c  |  2 ++
  drivers/gpu/drm/i915/i915_vma.c | 11 +++
  drivers/gpu/drm/i915/selftests/i915_gem_evict.c |  3 +++
  drivers/gpu/drm/i915/selftests/i915_gem_gtt.c   |  3 +++
  8 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ed44aeee583..5141a8ba4836 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -246,18 +246,19 @@ int
  i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
  {
-   struct drm_i915_private *dev_priv = to_i915(dev);
-   struct i915_ggtt *ggtt = _priv->ggtt;
+   struct i915_ggtt *ggtt = _i915(dev)->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
u64 pinned;
  
+	mutex_lock(>vm.mutex);

+
pinned = ggtt->vm.reserved;
-   mutex_lock(>struct_mutex);
list_for_each_entry(vma, >vm.bound_list, vm_link)
if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
-   mutex_unlock(>struct_mutex);
+
+   mutex_unlock(>vm.mutex);
  
  	args->aper_size = ggtt->vm.total;

args->aper_available_size = args->aper_size - pinned;
@@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
  
  static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)

  {
-   struct drm_i915_private *i915;
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct list_head *list;
struct i915_vma *vma;
  
  	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
  
+	mutex_lock(>ggtt.vm.mutex);

for_each_ggtt_vma(vma, obj) {
if (!drm_mm_node_allocated(>node))
continue;
  
  		list_move_tail(>vm_link, >vm->bound_list);

}
+   mutex_unlock(>ggtt.vm.mutex);


This is now struct_mutex -> vm->mutex nesting, which we would preferably 
want to avoid? There only two callers for the function.


It looks we could remove nesting from i915_gem_set_domain_ioctl by just 
moving the call to after mutex unlock.


i915_gem_object_unpin_from_display_plane callers are not as easy so 
maybe at least do the one above?


  
-	i915 = to_i915(obj->base.dev);

spin_lock(>mm.obj_lock);
list = obj->bind_count ? >mm.bound_list : >mm.unbound_list;
list_move_tail(>mm.link, list);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index a76f65fe86be..4a0c6830659d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
}
  
  	INIT_LIST_HEAD(_list);

+   mutex_lock(>mutex);
list_for_each_entry(vma, >bound_list, vm_link) {
if (i915_vma_is_pinned(vma))
continue;
@@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
__i915_vma_pin(vma);
list_add(>evict_link, _list);
}
+   mutex_unlock(>mutex);


This is another nesting so I suppose you leave all this fun for later?

  
  	ret = 0;

list_for_each_entry_safe(vma, next, _list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad4ef8980b97..c3363a9b586b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1932,7 +1932,10 @@ static struct i915_vma *pd_vma_create(struct 
gen6_hw_ppgtt *ppgtt, int size)
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
  
  	INIT_LIST_HEAD(>obj_link);

+
+   mutex_lock(>vm->mutex);
list_add(>vm_link, >vm->unbound_list);
+   mutex_unlock(>vm->mutex);
  
  	return vma;

  }
@@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct 
drm_i915_private *dev_priv)
  
  	i915_check_and_clear_faults(dev_priv);
  
+	mutex_lock(>vm.mutex);

+
/* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(>vm, 0, ggtt->vm.total);
-
ggtt->vm.closed = true; /* skip rewriting PTE 

[Intel-gfx] [PATCH 32/46] drm/i915: Pull VM lists under the VM mutex.

2019-01-07 Thread Chris Wilson
A starting point to counter the pervasive struct_mutex. For the goal of
avoiding (or at least blocking under them!) global locks during user
request submission, a simple but important step is being able to manage
each clients GTT separately. For which, we want to replace using the
struct_mutex as the guard for all things GTT/VM and switch instead to a
specific mutex inside i915_address_space.

Signed-off-by: Chris Wilson 
---
 drivers/gpu/drm/i915/i915_gem.c | 14 --
 drivers/gpu/drm/i915/i915_gem_evict.c   |  2 ++
 drivers/gpu/drm/i915/i915_gem_gtt.c | 15 +--
 drivers/gpu/drm/i915/i915_gem_shrinker.c|  4 
 drivers/gpu/drm/i915/i915_gem_stolen.c  |  2 ++
 drivers/gpu/drm/i915/i915_vma.c | 11 +++
 drivers/gpu/drm/i915/selftests/i915_gem_evict.c |  3 +++
 drivers/gpu/drm/i915/selftests/i915_gem_gtt.c   |  3 +++
 8 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ed44aeee583..5141a8ba4836 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -246,18 +246,19 @@ int
 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
 {
-   struct drm_i915_private *dev_priv = to_i915(dev);
-   struct i915_ggtt *ggtt = _priv->ggtt;
+   struct i915_ggtt *ggtt = _i915(dev)->ggtt;
struct drm_i915_gem_get_aperture *args = data;
struct i915_vma *vma;
u64 pinned;
 
+   mutex_lock(>vm.mutex);
+
pinned = ggtt->vm.reserved;
-   mutex_lock(>struct_mutex);
list_for_each_entry(vma, >vm.bound_list, vm_link)
if (i915_vma_is_pinned(vma))
pinned += vma->node.size;
-   mutex_unlock(>struct_mutex);
+
+   mutex_unlock(>vm.mutex);
 
args->aper_size = ggtt->vm.total;
args->aper_available_size = args->aper_size - pinned;
@@ -1530,20 +1531,21 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
*data,
 
 static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 {
-   struct drm_i915_private *i915;
+   struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct list_head *list;
struct i915_vma *vma;
 
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
+   mutex_lock(>ggtt.vm.mutex);
for_each_ggtt_vma(vma, obj) {
if (!drm_mm_node_allocated(>node))
continue;
 
list_move_tail(>vm_link, >vm->bound_list);
}
+   mutex_unlock(>ggtt.vm.mutex);
 
-   i915 = to_i915(obj->base.dev);
spin_lock(>mm.obj_lock);
list = obj->bind_count ? >mm.bound_list : >mm.unbound_list;
list_move_tail(>mm.link, list);
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c 
b/drivers/gpu/drm/i915/i915_gem_evict.c
index a76f65fe86be..4a0c6830659d 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -433,6 +433,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
}
 
INIT_LIST_HEAD(_list);
+   mutex_lock(>mutex);
list_for_each_entry(vma, >bound_list, vm_link) {
if (i915_vma_is_pinned(vma))
continue;
@@ -440,6 +441,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm)
__i915_vma_pin(vma);
list_add(>evict_link, _list);
}
+   mutex_unlock(>mutex);
 
ret = 0;
list_for_each_entry_safe(vma, next, _list, evict_link) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ad4ef8980b97..c3363a9b586b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1932,7 +1932,10 @@ static struct i915_vma *pd_vma_create(struct 
gen6_hw_ppgtt *ppgtt, int size)
vma->ggtt_view.type = I915_GGTT_VIEW_ROTATED; /* prevent fencing */
 
INIT_LIST_HEAD(>obj_link);
+
+   mutex_lock(>vm->mutex);
list_add(>vm_link, >vm->unbound_list);
+   mutex_unlock(>vm->mutex);
 
return vma;
 }
@@ -3504,9 +3507,10 @@ void i915_gem_restore_gtt_mappings(struct 
drm_i915_private *dev_priv)
 
i915_check_and_clear_faults(dev_priv);
 
+   mutex_lock(>vm.mutex);
+
/* First fill our portion of the GTT with scratch pages */
ggtt->vm.clear_range(>vm, 0, ggtt->vm.total);
-
ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */
 
/* clflush objects bound into the GGTT and rebind them. */
@@ -3516,19 +3520,26 @@ void i915_gem_restore_gtt_mappings(struct 
drm_i915_private *dev_priv)
if (!(vma->flags & I915_VMA_GLOBAL_BIND))
continue;
 
+   mutex_unlock(>vm.mutex);
+
if (!i915_vma_unbind(vma))
-   continue;
+   goto lock;