Re: [Intel-gfx] [PATCH 47/64] drm/i915: Be more careful when unbinding vma
On 12/07/16 17:42, Chris Wilson wrote: On Tue, Jul 12, 2016 at 04:04:57PM +0100, Tvrtko Ursulin wrote: @@ -3463,11 +3483,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the +* obj->vma_list (due to side-effects from retiring +* an active vma), play safe and restart the iterator. +*/ + goto restart; } Does not look efficient for long lists but I don't see a solution right now. Any chance of this O(N^2) iteration hurting us in the real world? Defintely not pretty, but couldn't think of a clean & safe alternative, so went with simple for a change. Fortunately, it is more or less a one-time conversion, operating on a shared buffer between normally 2 clients. However, they will full-ppgtt or we would not have multple vma, and so not affected. Single client multiple vma case would be partial vma. That could be a much larger list... But on the afffected machines, such vma would already be in the correct cache regime and so not need rebinding. I don't forsee (or have seen) anybody spinning here, so I am quite happy to punt the problem until someone complains. At least I think I can write a test case to have lots of partial vma, but not necessarily able to trigger the restart. That would require careful contrl of fragmentation with the ggtt... But mixing gtt faults of a bunch of small objects and different partials of a large should be enough... Or object sharing. But yeah, I agree that it feels OK to leave that for later if it ever surfaces. Reviewed-by: Tvrtko Ursulin Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 47/64] drm/i915: Be more careful when unbinding vma
On Tue, Jul 12, 2016 at 04:04:57PM +0100, Tvrtko Ursulin wrote: > >@@ -3463,11 +3483,18 @@ int i915_gem_object_set_cache_level(struct > >drm_i915_gem_object *obj, > > return -EBUSY; > > } > > > >-if (!i915_gem_valid_gtt_space(vma, cache_level)) { > >-ret = i915_vma_unbind(vma); > >-if (ret) > >-return ret; > >-} > >+if (i915_gem_valid_gtt_space(vma, cache_level)) > >+continue; > >+ > >+ret = i915_vma_unbind(vma); > >+if (ret) > >+return ret; > >+ > >+/* As unbinding may affect other elements in the > >+ * obj->vma_list (due to side-effects from retiring > >+ * an active vma), play safe and restart the iterator. > >+ */ > >+goto restart; > > } > > Does not look efficient for long lists but I don't see a solution > right now. Any chance of this O(N^2) iteration hurting us in the > real world? Defintely not pretty, but couldn't think of a clean & safe alternative, so went with simple for a change. Fortunately, it is more or less a one-time conversion, operating on a shared buffer between normally 2 clients. However, they will full-ppgtt or we would not have multple vma, and so not affected. Single client multiple vma case would be partial vma. That could be a much larger list... But on the afffected machines, such vma would already be in the correct cache regime and so not need rebinding. I don't forsee (or have seen) anybody spinning here, so I am quite happy to punt the problem until someone complains. At least I think I can write a test case to have lots of partial vma, but not necessarily able to trigger the restart. That would require careful contrl of fragmentation with the ggtt... But mixing gtt faults of a bunch of small objects and different partials of a large should be enough... -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 47/64] drm/i915: Be more careful when unbinding vma
On 07/07/16 09:41, Chris Wilson wrote: When we call i915_vma_unbind(), we will wait upon outstanding rendering. This will also trigger a retirement phase, which may update the object lists. If, we extend request tracking to the VMA itself (rather than keep it at the encompassing object), then there is a potential that the obj->vma_list be modified for other elements upon i915_vma_unbind(). As a result, if we walk over the object list and call i915_vma_unbind(), we need to be prepared for that list to change. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 57 +++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 7 +--- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-- 4 files changed, 46 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 633585054669..27e1182544a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3032,6 +3032,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); * _guarantee_ VMA in question is _not in use_ anywhere. */ int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); + +int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c6816f9969d5..28a3079a7892 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -283,18 +283,38 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +int +i915_gem_object_unbind(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + LIST_HEAD(still_in_list); + int ret; + + /* The vma will only be freed if it is marked as closed, and if we wait +* upon rendering to the vma, we may unbind anything in the list. +*/ + while ((vma = list_first_entry_or_null(&obj->vma_list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) + break; + } + list_splice(&still_in_list, &obj->vma_list); + + return ret; +} + static int drop_pages(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; int ret; i915_gem_object_get(obj); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - - ret = i915_gem_object_put_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = i915_gem_object_put_pages(obj); i915_gem_object_put(obj); return ret; @@ -3442,8 +3462,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@ -3454,7 +3473,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { +restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3463,11 +3483,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the +* obj->vma_list (due to side-effects from retiring +* an active vma), play safe and restart the iterator. +*/ + goto restart; } Does not look efficient for long lists but I don't see a solution right now. Any chance of this O(N^2) iteration hurting us in the real
[Intel-gfx] [PATCH 47/64] drm/i915: Be more careful when unbinding vma
When we call i915_vma_unbind(), we will wait upon outstanding rendering. This will also trigger a retirement phase, which may update the object lists. If, we extend request tracking to the VMA itself (rather than keep it at the encompassing object), then there is a potential that the obj->vma_list be modified for other elements upon i915_vma_unbind(). As a result, if we walk over the object list and call i915_vma_unbind(), we need to be prepared for that list to change. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/i915_gem.c | 57 +++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 7 +--- drivers/gpu/drm/i915/i915_gem_userptr.c | 4 +-- 4 files changed, 46 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 633585054669..27e1182544a2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3032,6 +3032,8 @@ int __must_check i915_vma_unbind(struct i915_vma *vma); * _guarantee_ VMA in question is _not in use_ anywhere. */ int __must_check __i915_vma_unbind_no_wait(struct i915_vma *vma); + +int i915_gem_object_unbind(struct drm_i915_gem_object *obj); int i915_gem_object_put_pages(struct drm_i915_gem_object *obj); void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv); void i915_gem_release_mmap(struct drm_i915_gem_object *obj); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c6816f9969d5..28a3079a7892 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -283,18 +283,38 @@ static const struct drm_i915_gem_object_ops i915_gem_phys_ops = { .release = i915_gem_object_release_phys, }; +int +i915_gem_object_unbind(struct drm_i915_gem_object *obj) +{ + struct i915_vma *vma; + LIST_HEAD(still_in_list); + int ret; + + /* The vma will only be freed if it is marked as closed, and if we wait +* upon rendering to the vma, we may unbind anything in the list. +*/ + while ((vma = list_first_entry_or_null(&obj->vma_list, + struct i915_vma, + obj_link))) { + list_move_tail(&vma->obj_link, &still_in_list); + ret = i915_vma_unbind(vma); + if (ret) + break; + } + list_splice(&still_in_list, &obj->vma_list); + + return ret; +} + static int drop_pages(struct drm_i915_gem_object *obj) { - struct i915_vma *vma, *next; int ret; i915_gem_object_get(obj); - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) - if (i915_vma_unbind(vma)) - break; - - ret = i915_gem_object_put_pages(obj); + ret = i915_gem_object_unbind(obj); + if (ret == 0) + ret = i915_gem_object_put_pages(obj); i915_gem_object_put(obj); return ret; @@ -3442,8 +3462,7 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write) int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, enum i915_cache_level cache_level) { - struct drm_device *dev = obj->base.dev; - struct i915_vma *vma, *next; + struct i915_vma *vma; int ret = 0; if (obj->cache_level == cache_level) @@ -3454,7 +3473,8 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, * catch the issue of the CS prefetch crossing page boundaries and * reading an invalid PTE on older architectures. */ - list_for_each_entry_safe(vma, next, &obj->vma_list, obj_link) { +restart: + list_for_each_entry(vma, &obj->vma_list, obj_link) { if (!drm_mm_node_allocated(&vma->node)) continue; @@ -3463,11 +3483,18 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, return -EBUSY; } - if (!i915_gem_valid_gtt_space(vma, cache_level)) { - ret = i915_vma_unbind(vma); - if (ret) - return ret; - } + if (i915_gem_valid_gtt_space(vma, cache_level)) + continue; + + ret = i915_vma_unbind(vma); + if (ret) + return ret; + + /* As unbinding may affect other elements in the +* obj->vma_list (due to side-effects from retiring +* an active vma), play safe and restart the iterator. +*/ + goto restart; } /* We can reuse the existing drm_mm nodes but need to change the @@ -3486,7 +3513,7 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj, if (ret)