Re: [Intel-gfx] [PATCH v2] drm/i915: Restrict pagefault disabling to just around copy_from_user()
On 18/10/2016 10:07, Chris Wilson wrote: When handling execbuf relocations, we play a delicate dance with pagefault. We first try to access the user pages underneath our struct_mutex. However, if those pages were inside a GEM object, we may trigger a pagefault and deadlock as i915_gem_fault() tries to recursively acquire struct_mutex. Instead, we choose to disable pagefaulting around the copy_from_user whilst inside the struct_mutex and handle the EFAULT by falling back to a copy outside the struct_mutex. We however presumed that disabling pagefaults would be expensive. It is just an operation on the local current task. Cheap enough that we can restrict the disable/enable to the critical section around the copy, and so avoid having to handle the atomic sections within the relocation handling itself. v2: Just illustrate the broken error handling rather than argue why it is safer to ignore it, for now. Signed-off-by: Chris WilsonCc: Daniel Vetter Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 71 +++--- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 204536a3087e..e52affdcc125 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -551,20 +551,6 @@ relocate_entry(struct drm_i915_gem_object *obj, return 0; } -static bool object_is_idle(struct drm_i915_gem_object *obj) -{ - unsigned long active = i915_gem_object_get_active(obj); - int idx; - - for_each_active(active, idx) { - if (!i915_gem_active_is_idle(>last_read[idx], ->base.dev->struct_mutex)) - return false; - } - - return true; -} - static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, @@ -648,10 +634,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EINVAL; } - /* We can't wait for rendering with pagefaults disabled */ - if (pagefault_disabled() && !object_is_idle(obj)) - return -EFAULT; - ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; @@ -678,12 +660,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, remain = entry->relocation_count; while (remain) { struct drm_i915_gem_relocation_entry *r = stack_reloc; - int count = remain; - if (count > ARRAY_SIZE(stack_reloc)) - count = ARRAY_SIZE(stack_reloc); + unsigned long unwritten; + unsigned int count; + + count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); remain -= count; - if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { + /* This is the fast path and we cannot handle a pagefault +* whilst holding the struct mutex lest the user pass in the +* relocations contained within a mmaped bo. For in such a case +* we, the page fault handler would call i915_gem_fault() and +* we would try to acquire the struct mutex again. Obviously +* this is bad and so lockdep complains vehemently. +*/ + pagefault_disable(); + unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); + pagefault_enable(); + if (unlikely(unwritten)) { ret = -EFAULT; goto out; } @@ -695,11 +688,26 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, if (ret) goto out; - if (r->presumed_offset != offset && - __put_user(r->presumed_offset, - _relocs->presumed_offset)) { - ret = -EFAULT; - goto out; + if (r->presumed_offset != offset) { + pagefault_disable(); + unwritten = __put_user(r->presumed_offset, + _relocs->presumed_offset); + pagefault_enable(); + if (unlikely(unwritten)) { + /* Note that reporting an error now +* leaves everything in an inconsistent +* state as we have *already* changed +* the relocation value inside the +
[Intel-gfx] [PATCH v2] drm/i915: Restrict pagefault disabling to just around copy_from_user()
When handling execbuf relocations, we play a delicate dance with pagefault. We first try to access the user pages underneath our struct_mutex. However, if those pages were inside a GEM object, we may trigger a pagefault and deadlock as i915_gem_fault() tries to recursively acquire struct_mutex. Instead, we choose to disable pagefaulting around the copy_from_user whilst inside the struct_mutex and handle the EFAULT by falling back to a copy outside the struct_mutex. We however presumed that disabling pagefaults would be expensive. It is just an operation on the local current task. Cheap enough that we can restrict the disable/enable to the critical section around the copy, and so avoid having to handle the atomic sections within the relocation handling itself. v2: Just illustrate the broken error handling rather than argue why it is safer to ignore it, for now. Signed-off-by: Chris WilsonCc: Daniel Vetter Cc: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 71 +++--- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 204536a3087e..e52affdcc125 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -551,20 +551,6 @@ relocate_entry(struct drm_i915_gem_object *obj, return 0; } -static bool object_is_idle(struct drm_i915_gem_object *obj) -{ - unsigned long active = i915_gem_object_get_active(obj); - int idx; - - for_each_active(active, idx) { - if (!i915_gem_active_is_idle(>last_read[idx], ->base.dev->struct_mutex)) - return false; - } - - return true; -} - static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_vmas *eb, @@ -648,10 +634,6 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EINVAL; } - /* We can't wait for rendering with pagefaults disabled */ - if (pagefault_disabled() && !object_is_idle(obj)) - return -EFAULT; - ret = relocate_entry(obj, reloc, cache, target_offset); if (ret) return ret; @@ -678,12 +660,23 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, remain = entry->relocation_count; while (remain) { struct drm_i915_gem_relocation_entry *r = stack_reloc; - int count = remain; - if (count > ARRAY_SIZE(stack_reloc)) - count = ARRAY_SIZE(stack_reloc); + unsigned long unwritten; + unsigned int count; + + count = min_t(unsigned int, remain, ARRAY_SIZE(stack_reloc)); remain -= count; - if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0]))) { + /* This is the fast path and we cannot handle a pagefault +* whilst holding the struct mutex lest the user pass in the +* relocations contained within a mmaped bo. For in such a case +* we, the page fault handler would call i915_gem_fault() and +* we would try to acquire the struct mutex again. Obviously +* this is bad and so lockdep complains vehemently. +*/ + pagefault_disable(); + unwritten = __copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])); + pagefault_enable(); + if (unlikely(unwritten)) { ret = -EFAULT; goto out; } @@ -695,11 +688,26 @@ i915_gem_execbuffer_relocate_vma(struct i915_vma *vma, if (ret) goto out; - if (r->presumed_offset != offset && - __put_user(r->presumed_offset, - _relocs->presumed_offset)) { - ret = -EFAULT; - goto out; + if (r->presumed_offset != offset) { + pagefault_disable(); + unwritten = __put_user(r->presumed_offset, + _relocs->presumed_offset); + pagefault_enable(); + if (unlikely(unwritten)) { + /* Note that reporting an error now +* leaves everything in an inconsistent +* state as we have *already* changed +* the relocation value inside the +