Re: [Intel-gfx] [PATCH 3/4] drm/i915/guc: Fix GuC HW/SW state cleanup in unload path
On Fri, Sep 01, 2017 at 11:02:11AM +0530, Sagar Arun Kamble wrote: > Teardown of GuC HW/SW state was not properly done in unload path. > During unload, we can rely on intel_guc_reset_prepare being done > as part of i915_gem_suspend for disabling GuC interfaces. > We will have to disable GuC submission prior to suspend as that involves > communication with GuC to destroy doorbell. So intel_uc_fini_hw has to > be called as part of i915_gem_suspend during unload as that really > takes care of finishing the GuC operations. Created new parameter for > i915_gem_suspend to handle unload/suspend path w.r.t gem and GuC suspend. > GuC related allocations are cleaned up as part of intel_uc_cleanup_hw. > > v2: Prepared i915_gem_unload. (Michal) > > Cc: Chris Wilson> Cc: Michal Wajdeczko > Cc: Daniele Ceraolo Spurio > Signed-off-by: Sagar Arun Kamble > --- > drivers/gpu/drm/i915/i915_drv.c| 6 +-- > drivers/gpu/drm/i915/i915_drv.h| 1 + > drivers/gpu/drm/i915/i915_gem.c| 79 > ++ > drivers/gpu/drm/i915/intel_guc.c | 13 ++ > drivers/gpu/drm/i915/intel_guc.h | 1 + > drivers/gpu/drm/i915/intel_uc.c| 14 +++--- > drivers/gpu/drm/i915/intel_uc_common.h | 1 + > 7 files changed, 103 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index b2e8f95..b6cc2fe 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -601,7 +601,7 @@ static void i915_gem_fini(struct drm_i915_private > *dev_priv) > i915_gem_drain_workqueue(dev_priv); > > mutex_lock(_priv->drm.struct_mutex); > - intel_uc_fini_hw(dev_priv); > + intel_uc_cleanup_hw(dev_priv); > i915_gem_cleanup_engines(dev_priv); > i915_gem_contexts_fini(dev_priv); > i915_gem_cleanup_userptr(dev_priv); > @@ -682,7 +682,7 @@ static int i915_load_modeset_init(struct drm_device *dev) > return 0; > > cleanup_gem: > - if (i915_gem_suspend(dev_priv)) > + if (i915_gem_unload(dev_priv)) > DRM_ERROR("failed to idle hardware; continuing to unload!\n"); > i915_gem_fini(dev_priv); > cleanup_uc: > @@ -1375,7 +1375,7 @@ void i915_driver_unload(struct drm_device *dev) > > i915_driver_unregister(dev_priv); > > - if (i915_gem_suspend(dev_priv)) > + if (i915_gem_unload(dev_priv)) > DRM_ERROR("failed to idle hardware; continuing to unload!\n"); > > intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 064bf0f..570e71e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3628,6 +3628,7 @@ void i915_gem_reset_engine(struct intel_engine_cs > *engine, > int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, > unsigned int flags); > int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); > +int __must_check i915_gem_unload(struct drm_i915_private *dev_priv); > void i915_gem_resume(struct drm_i915_private *dev_priv); > int i915_gem_fault(struct vm_fault *vmf); > int i915_gem_object_wait(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 977500f..24cefe9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -4624,6 +4624,85 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) > return ret; > } > > +int i915_gem_unload(struct drm_i915_private *dev_priv) > +{ > + struct drm_device *dev = _priv->drm; > + int ret; > + > + intel_runtime_pm_get(dev_priv); > + intel_suspend_gt_powersave(dev_priv); > + > + mutex_lock(>struct_mutex); > + > + /* We have to flush all the executing contexts to main memory so > + * that they can saved in the hibernation image. To ensure the last > + * context image is coherent, we have to switch away from it. That > + * leaves the dev_priv->kernel_context still active when > + * we actually suspend, and its image in memory may not match the GPU > + * state. Fortunately, the kernel_context is disposable and we do > + * not rely on its state. > + */ > + ret = i915_gem_switch_to_kernel_context(dev_priv); > + if (ret) > + goto err_unlock; > + > + ret = i915_gem_wait_for_idle(dev_priv, > + I915_WAIT_INTERRUPTIBLE | > + I915_WAIT_LOCKED); > + if (ret) > + goto err_unlock; > + > + assert_kernel_context_is_current(dev_priv); > + i915_gem_contexts_lost(dev_priv); > + mutex_unlock(>struct_mutex); > + > + cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work); > +
Re: [Intel-gfx] [PATCH 3/4] drm/i915/guc: Fix GuC HW/SW state cleanup in unload path
On 9/5/2017 8:24 PM, Michał Winiarski wrote: On Fri, Sep 01, 2017 at 11:02:11AM +0530, Sagar Arun Kamble wrote: Teardown of GuC HW/SW state was not properly done in unload path. During unload, we can rely on intel_guc_reset_prepare being done as part of i915_gem_suspend for disabling GuC interfaces. We will have to disable GuC submission prior to suspend as that involves communication with GuC to destroy doorbell. So intel_uc_fini_hw has to be called as part of i915_gem_suspend during unload as that really takes care of finishing the GuC operations. Created new parameter for i915_gem_suspend to handle unload/suspend path w.r.t gem and GuC suspend. GuC related allocations are cleaned up as part of intel_uc_cleanup_hw. Is this still accurate description? After changes from v2? No. Sorry. Will update this in the new version. v2: Prepared i915_gem_unload. (Michal) Cc: Chris WilsonCc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Signed-off-by: Sagar Arun Kamble --- drivers/gpu/drm/i915/i915_drv.c| 6 +-- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem.c| 79 ++ drivers/gpu/drm/i915/intel_guc.c | 13 ++ drivers/gpu/drm/i915/intel_guc.h | 1 + drivers/gpu/drm/i915/intel_uc.c| 14 +++--- drivers/gpu/drm/i915/intel_uc_common.h | 1 + 7 files changed, 103 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b2e8f95..b6cc2fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -601,7 +601,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) i915_gem_drain_workqueue(dev_priv); mutex_lock(_priv->drm.struct_mutex); - intel_uc_fini_hw(dev_priv); + intel_uc_cleanup_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); i915_gem_cleanup_userptr(dev_priv); @@ -682,7 +682,7 @@ static int i915_load_modeset_init(struct drm_device *dev) return 0; cleanup_gem: - if (i915_gem_suspend(dev_priv)) + if (i915_gem_unload(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); i915_gem_fini(dev_priv); cleanup_uc: @@ -1375,7 +1375,7 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_unregister(dev_priv); - if (i915_gem_suspend(dev_priv)) + if (i915_gem_unload(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 064bf0f..570e71e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3628,6 +3628,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); +int __must_check i915_gem_unload(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_fault *vmf); int i915_gem_object_wait(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 977500f..24cefe9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4624,6 +4624,85 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) return ret; } +int i915_gem_unload(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = _priv->drm; + int ret; + + intel_runtime_pm_get(dev_priv); + intel_suspend_gt_powersave(dev_priv); + + mutex_lock(>struct_mutex); + + /* We have to flush all the executing contexts to main memory so +* that they can saved in the hibernation image. To ensure the last +* context image is coherent, we have to switch away from it. That +* leaves the dev_priv->kernel_context still active when +* we actually suspend, and its image in memory may not match the GPU +* state. Fortunately, the kernel_context is disposable and we do +* not rely on its state. +*/ + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err_unlock; + + ret = i915_gem_wait_for_idle(dev_priv, +I915_WAIT_INTERRUPTIBLE | +I915_WAIT_LOCKED); + if (ret) + goto err_unlock; + + assert_kernel_context_is_current(dev_priv); + i915_gem_contexts_lost(dev_priv); + mutex_unlock(>struct_mutex); + + cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work); +
Re: [Intel-gfx] [PATCH 3/4] drm/i915/guc: Fix GuC HW/SW state cleanup in unload path
On Fri, Sep 01, 2017 at 11:02:11AM +0530, Sagar Arun Kamble wrote: > Teardown of GuC HW/SW state was not properly done in unload path. > During unload, we can rely on intel_guc_reset_prepare being done > as part of i915_gem_suspend for disabling GuC interfaces. > We will have to disable GuC submission prior to suspend as that involves > communication with GuC to destroy doorbell. So intel_uc_fini_hw has to > be called as part of i915_gem_suspend during unload as that really > takes care of finishing the GuC operations. Created new parameter for > i915_gem_suspend to handle unload/suspend path w.r.t gem and GuC suspend. > GuC related allocations are cleaned up as part of intel_uc_cleanup_hw. Is this still accurate description? After changes from v2? > > v2: Prepared i915_gem_unload. (Michal) > > Cc: Chris Wilson> Cc: Michal Wajdeczko > Cc: Daniele Ceraolo Spurio > Signed-off-by: Sagar Arun Kamble > --- > drivers/gpu/drm/i915/i915_drv.c| 6 +-- > drivers/gpu/drm/i915/i915_drv.h| 1 + > drivers/gpu/drm/i915/i915_gem.c| 79 > ++ > drivers/gpu/drm/i915/intel_guc.c | 13 ++ > drivers/gpu/drm/i915/intel_guc.h | 1 + > drivers/gpu/drm/i915/intel_uc.c| 14 +++--- > drivers/gpu/drm/i915/intel_uc_common.h | 1 + > 7 files changed, 103 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index b2e8f95..b6cc2fe 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -601,7 +601,7 @@ static void i915_gem_fini(struct drm_i915_private > *dev_priv) > i915_gem_drain_workqueue(dev_priv); > > mutex_lock(_priv->drm.struct_mutex); > - intel_uc_fini_hw(dev_priv); > + intel_uc_cleanup_hw(dev_priv); > i915_gem_cleanup_engines(dev_priv); > i915_gem_contexts_fini(dev_priv); > i915_gem_cleanup_userptr(dev_priv); > @@ -682,7 +682,7 @@ static int i915_load_modeset_init(struct drm_device *dev) > return 0; > > cleanup_gem: > - if (i915_gem_suspend(dev_priv)) > + if (i915_gem_unload(dev_priv)) > DRM_ERROR("failed to idle hardware; continuing to unload!\n"); > i915_gem_fini(dev_priv); > cleanup_uc: > @@ -1375,7 +1375,7 @@ void i915_driver_unload(struct drm_device *dev) > > i915_driver_unregister(dev_priv); > > - if (i915_gem_suspend(dev_priv)) > + if (i915_gem_unload(dev_priv)) > DRM_ERROR("failed to idle hardware; continuing to unload!\n"); > > intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 064bf0f..570e71e 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -3628,6 +3628,7 @@ void i915_gem_reset_engine(struct intel_engine_cs > *engine, > int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, > unsigned int flags); > int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); > +int __must_check i915_gem_unload(struct drm_i915_private *dev_priv); > void i915_gem_resume(struct drm_i915_private *dev_priv); > int i915_gem_fault(struct vm_fault *vmf); > int i915_gem_object_wait(struct drm_i915_gem_object *obj, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index 977500f..24cefe9 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -4624,6 +4624,85 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) > return ret; > } > > +int i915_gem_unload(struct drm_i915_private *dev_priv) > +{ > + struct drm_device *dev = _priv->drm; > + int ret; > + > + intel_runtime_pm_get(dev_priv); > + intel_suspend_gt_powersave(dev_priv); > + > + mutex_lock(>struct_mutex); > + > + /* We have to flush all the executing contexts to main memory so > + * that they can saved in the hibernation image. To ensure the last > + * context image is coherent, we have to switch away from it. That > + * leaves the dev_priv->kernel_context still active when > + * we actually suspend, and its image in memory may not match the GPU > + * state. Fortunately, the kernel_context is disposable and we do > + * not rely on its state. > + */ > + ret = i915_gem_switch_to_kernel_context(dev_priv); > + if (ret) > + goto err_unlock; > + > + ret = i915_gem_wait_for_idle(dev_priv, > + I915_WAIT_INTERRUPTIBLE | > + I915_WAIT_LOCKED); > + if (ret) > + goto err_unlock; > + > + assert_kernel_context_is_current(dev_priv); > + i915_gem_contexts_lost(dev_priv); > + mutex_unlock(>struct_mutex); > + > +
[Intel-gfx] [PATCH 3/4] drm/i915/guc: Fix GuC HW/SW state cleanup in unload path
Teardown of GuC HW/SW state was not properly done in unload path. During unload, we can rely on intel_guc_reset_prepare being done as part of i915_gem_suspend for disabling GuC interfaces. We will have to disable GuC submission prior to suspend as that involves communication with GuC to destroy doorbell. So intel_uc_fini_hw has to be called as part of i915_gem_suspend during unload as that really takes care of finishing the GuC operations. Created new parameter for i915_gem_suspend to handle unload/suspend path w.r.t gem and GuC suspend. GuC related allocations are cleaned up as part of intel_uc_cleanup_hw. v2: Prepared i915_gem_unload. (Michal) Cc: Chris WilsonCc: Michal Wajdeczko Cc: Daniele Ceraolo Spurio Signed-off-by: Sagar Arun Kamble --- drivers/gpu/drm/i915/i915_drv.c| 6 +-- drivers/gpu/drm/i915/i915_drv.h| 1 + drivers/gpu/drm/i915/i915_gem.c| 79 ++ drivers/gpu/drm/i915/intel_guc.c | 13 ++ drivers/gpu/drm/i915/intel_guc.h | 1 + drivers/gpu/drm/i915/intel_uc.c| 14 +++--- drivers/gpu/drm/i915/intel_uc_common.h | 1 + 7 files changed, 103 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index b2e8f95..b6cc2fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -601,7 +601,7 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv) i915_gem_drain_workqueue(dev_priv); mutex_lock(_priv->drm.struct_mutex); - intel_uc_fini_hw(dev_priv); + intel_uc_cleanup_hw(dev_priv); i915_gem_cleanup_engines(dev_priv); i915_gem_contexts_fini(dev_priv); i915_gem_cleanup_userptr(dev_priv); @@ -682,7 +682,7 @@ static int i915_load_modeset_init(struct drm_device *dev) return 0; cleanup_gem: - if (i915_gem_suspend(dev_priv)) + if (i915_gem_unload(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); i915_gem_fini(dev_priv); cleanup_uc: @@ -1375,7 +1375,7 @@ void i915_driver_unload(struct drm_device *dev) i915_driver_unregister(dev_priv); - if (i915_gem_suspend(dev_priv)) + if (i915_gem_unload(dev_priv)) DRM_ERROR("failed to idle hardware; continuing to unload!\n"); intel_display_power_get(dev_priv, POWER_DOMAIN_INIT); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 064bf0f..570e71e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3628,6 +3628,7 @@ void i915_gem_reset_engine(struct intel_engine_cs *engine, int i915_gem_wait_for_idle(struct drm_i915_private *dev_priv, unsigned int flags); int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv); +int __must_check i915_gem_unload(struct drm_i915_private *dev_priv); void i915_gem_resume(struct drm_i915_private *dev_priv); int i915_gem_fault(struct vm_fault *vmf); int i915_gem_object_wait(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 977500f..24cefe9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4624,6 +4624,85 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv) return ret; } +int i915_gem_unload(struct drm_i915_private *dev_priv) +{ + struct drm_device *dev = _priv->drm; + int ret; + + intel_runtime_pm_get(dev_priv); + intel_suspend_gt_powersave(dev_priv); + + mutex_lock(>struct_mutex); + + /* We have to flush all the executing contexts to main memory so +* that they can saved in the hibernation image. To ensure the last +* context image is coherent, we have to switch away from it. That +* leaves the dev_priv->kernel_context still active when +* we actually suspend, and its image in memory may not match the GPU +* state. Fortunately, the kernel_context is disposable and we do +* not rely on its state. +*/ + ret = i915_gem_switch_to_kernel_context(dev_priv); + if (ret) + goto err_unlock; + + ret = i915_gem_wait_for_idle(dev_priv, +I915_WAIT_INTERRUPTIBLE | +I915_WAIT_LOCKED); + if (ret) + goto err_unlock; + + assert_kernel_context_is_current(dev_priv); + i915_gem_contexts_lost(dev_priv); + mutex_unlock(>struct_mutex); + + cancel_delayed_work_sync(_priv->gpu_error.hangcheck_work); + cancel_delayed_work_sync(_priv->gt.retire_work); + + /* As the idle_work is rearming if it detects a race, play safe and +* repeat the flush until it is definitely idle. +*/ + while