Inject probe errors into intel_uc_init_hw to make sure we
correctly handle any uC initialization failure.

To avoid complains from CI about injected errors use
i915_probe_error to lower message level.

v2: _sanitize instead _reset to correctly handle Gen9 retries

Signed-off-by: Michal Wajdeczko <michal.wajdec...@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Reviewed-by: Chris Wilson <ch...@chris-wilson.co.uk> #v1
---
 drivers/gpu/drm/i915/gt/uc/intel_huc.c   |  4 ++++
 drivers/gpu/drm/i915/gt/uc/intel_uc.c    | 24 ++++++++++++++++++++----
 drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c |  5 +++++
 drivers/gpu/drm/i915/i915_gem.c          |  2 +-
 4 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index c9535caba844..a696ce0fec62 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -139,6 +139,10 @@ int intel_huc_auth(struct intel_huc *huc)
        GEM_BUG_ON(!intel_uc_fw_is_loaded(&huc->fw));
        GEM_BUG_ON(intel_huc_is_authenticated(huc));
 
+       ret = i915_inject_load_error(gt->i915, -ENXIO);
+       if (ret)
+               goto fail;
+
        ret = intel_guc_auth_huc(guc,
                                 intel_guc_ggtt_offset(guc, huc->rsa_data));
        if (ret) {
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 66b226be6759..db16eef7795d 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -41,6 +41,10 @@ static int __intel_uc_reset_hw(struct intel_uc *uc)
        int ret;
        u32 guc_status;
 
+       ret = i915_inject_load_error(gt->i915, -ENXIO);
+       if (ret)
+               return ret;
+
        ret = intel_reset_guc(gt);
        if (ret) {
                DRM_ERROR("Failed to reset GuC, ret = %d\n", ret);
@@ -245,6 +249,10 @@ static int guc_enable_communication(struct intel_guc *guc)
 
        GEM_BUG_ON(guc_communication_enabled(guc));
 
+       ret = i915_inject_load_error(i915, -ENXIO);
+       if (ret)
+               return ret;
+
        ret = intel_guc_ct_enable(&guc->ct);
        if (ret)
                return ret;
@@ -376,7 +384,7 @@ void intel_uc_fini(struct intel_uc *uc)
        intel_guc_fini(guc);
 }
 
-static void __uc_sanitize(struct intel_uc *uc)
+static int __uc_sanitize(struct intel_uc *uc)
 {
        struct intel_guc *guc = &uc->guc;
        struct intel_huc *huc = &uc->huc;
@@ -386,7 +394,7 @@ static void __uc_sanitize(struct intel_uc *uc)
        intel_huc_sanitize(huc);
        intel_guc_sanitize(guc);
 
-       __intel_uc_reset_hw(uc);
+       return __intel_uc_reset_hw(uc);
 }
 
 void intel_uc_sanitize(struct intel_uc *uc)
@@ -414,6 +422,10 @@ static int uc_init_wopcm(struct intel_uc *uc)
        GEM_BUG_ON(!(size & GUC_WOPCM_SIZE_MASK));
        GEM_BUG_ON(size & ~GUC_WOPCM_SIZE_MASK);
 
+       err = i915_inject_load_error(gt->i915, -ENXIO);
+       if (err)
+               return err;
+
        mask = GUC_WOPCM_SIZE_MASK | GUC_WOPCM_SIZE_LOCKED;
        err = intel_uncore_write_and_verify(uncore, GUC_WOPCM_SIZE, size, mask,
                                            size | GUC_WOPCM_SIZE_LOCKED);
@@ -470,7 +482,7 @@ int intel_uc_init_hw(struct intel_uc *uc)
                 * Always reset the GuC just before (re)loading, so
                 * that the state and timing are fairly predictable
                 */
-               ret = __intel_uc_reset_hw(uc);
+               ret = __uc_sanitize(uc);
                if (ret)
                        goto err_out;
 
@@ -514,6 +526,10 @@ int intel_uc_init_hw(struct intel_uc *uc)
                        goto err_communication;
        }
 
+       ret = i915_inject_load_error(i915, -ENXIO);
+       if (ret)
+               goto err_communication;
+
        dev_info(i915->drm.dev, "GuC firmware version %u.%u\n",
                 guc->fw.major_ver_found, guc->fw.minor_ver_found);
        dev_info(i915->drm.dev, "GuC submission %s\n",
@@ -540,7 +556,7 @@ int intel_uc_init_hw(struct intel_uc *uc)
        if (GEM_WARN_ON(ret == -EIO))
                ret = -EINVAL;
 
-       dev_err(i915->drm.dev, "GuC initialization failed %d\n", ret);
+       i915_probe_error(i915, "GuC initialization failed %d\n", ret);
        return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c 
b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
index ac91e3efd02b..734b20bf635f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c
@@ -424,8 +424,13 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw, struct 
intel_gt *gt,
        /* make sure the status was cleared the last time we reset the uc */
        GEM_BUG_ON(intel_uc_fw_is_loaded(uc_fw));
 
+       err = i915_inject_load_error(gt->i915, -ENOEXEC);
+       if (err)
+               return err;
+
        if (!intel_uc_fw_is_available(uc_fw))
                return -ENOEXEC;
+
        /* Call custom loader */
        intel_uc_fw_ggtt_bind(uc_fw, gt);
        err = uc_fw_xfer(uc_fw, gt, wopcm_offset, dma_flags);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index bb2c991b49d7..54a10c8c4dff 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1243,7 +1243,7 @@ int i915_gem_init_hw(struct drm_i915_private *i915)
        /* We can't enable contexts until all firmware is loaded */
        ret = intel_uc_init_hw(&gt->uc);
        if (ret) {
-               DRM_ERROR("Enabling uc failed (%d)\n", ret);
+               i915_probe_error(i915, "Enabling uc failed (%d)\n", ret);
                goto out;
        }
 
-- 
2.19.2

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to