We're currently deleting the GuC logs if the FW fails to load, but those
are still useful to understand why the loading failed. Instead of
deleting them, taking a snapshot allows us to access them after driver
load is completed.

Cc: Oscar Mateo <oscar.ma...@intel.com>
Cc: Michal Wajdeczko <michal.wajdec...@intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospu...@intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c   | 36 ++++++++++++++++++++---------------
 drivers/gpu/drm/i915/i915_drv.c       |  3 +++
 drivers/gpu/drm/i915/i915_drv.h       |  6 ++++++
 drivers/gpu/drm/i915/i915_gpu_error.c | 36 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/intel_guc_fwif.h | 14 +++++++++++---
 drivers/gpu/drm/i915/intel_guc_log.c  | 10 ++--------
 drivers/gpu/drm/i915/intel_uc.c       |  7 +++++--
 7 files changed, 84 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
b/drivers/gpu/drm/i915/i915_debugfs.c
index 870c470..4ff20fc 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2543,26 +2543,32 @@ static int i915_guc_info(struct seq_file *m, void *data)
 static int i915_guc_log_dump(struct seq_file *m, void *data)
 {
        struct drm_i915_private *dev_priv = node_to_i915(m->private);
-       struct drm_i915_gem_object *obj;
-       int i = 0, pg;
-
-       if (!dev_priv->guc.log.vma)
+       u32 *log;
+       int i = 0;
+
+       if (dev_priv->guc.log.vma) {
+               log = i915_gem_object_pin_map(dev_priv->guc.log.vma->obj,
+                                             I915_MAP_WC);
+               if (IS_ERR(log)) {
+                       DRM_ERROR("Failed to pin guc_log vma\n");
+                       return -ENOMEM;
+               }
+       } else if (dev_priv->gpu_error.guc_load_fail_log) {
+               log = dev_priv->gpu_error.guc_load_fail_log;
+       } else {
                return 0;
-
-       obj = dev_priv->guc.log.vma->obj;
-       for (pg = 0; pg < obj->base.size / PAGE_SIZE; pg++) {
-               u32 *log = kmap_atomic(i915_gem_object_get_page(obj, pg));
-
-               for (i = 0; i < PAGE_SIZE / sizeof(u32); i += 4)
-                       seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
-                                  *(log + i), *(log + i + 1),
-                                  *(log + i + 2), *(log + i + 3));
-
-               kunmap_atomic(log);
        }
 
+       for (i = 0; i < GUC_LOG_SIZE / sizeof(u32); i += 4)
+               seq_printf(m, "0x%08x 0x%08x 0x%08x 0x%08x\n",
+                          *(log + i), *(log + i + 1),
+                          *(log + i + 2), *(log + i + 3));
+
        seq_putc(m, '\n');
 
+       if (dev_priv->guc.log.vma)
+               i915_gem_object_unpin_map(dev_priv->guc.log.vma->obj);
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 452c265..c7cb36c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -1354,6 +1354,9 @@ void i915_driver_unload(struct drm_device *dev)
        cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
        i915_reset_error_state(dev_priv);
 
+       /* release GuC error log (if any) */
+       i915_guc_load_error_log_free(dev_priv);
+
        /* Flush any outstanding unpin_work. */
        drain_workqueue(dev_priv->wq);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4588b3e..761c663 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1555,6 +1555,9 @@ struct i915_gpu_error {
        /* Protected by the above dev->gpu_error.lock. */
        struct i915_gpu_state *first_error;
 
+       /* Log snapshot if GuC errors during load */
+       void *guc_load_fail_log;
+
        unsigned long missed_irq_rings;
 
        /**
@@ -3687,6 +3690,9 @@ static inline void i915_reset_error_state(struct 
drm_i915_private *i915)
 
 #endif
 
+void i915_guc_load_error_log_capture(struct drm_i915_private *i915);
+void i915_guc_load_error_log_free(struct drm_i915_private *i915);
+
 const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c 
b/drivers/gpu/drm/i915/i915_gpu_error.c
index ec526d9..44a873b 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1809,3 +1809,39 @@ void i915_reset_error_state(struct drm_i915_private 
*i915)
 
        i915_gpu_state_put(error);
 }
+
+void i915_guc_load_error_log_capture(struct drm_i915_private *i915)
+{
+       void *log, *buf;
+       struct i915_vma *vma = i915->guc.log.vma;
+
+       if (i915->gpu_error.guc_load_fail_log || !vma)
+               return;
+
+       /*
+        * the vma should be already pinned and mapped for log runtime
+        * management but let's play safe
+        */
+       log = i915_gem_object_pin_map(vma->obj, I915_MAP_WC);
+       if (IS_ERR(log)) {
+               DRM_ERROR("Failed to pin guc_log vma\n");
+               return;
+       }
+
+       buf = kzalloc(GUC_LOG_SIZE, GFP_KERNEL);
+       if (buf) {
+               memcpy(buf, log, GUC_LOG_SIZE);
+               i915->gpu_error.guc_load_fail_log = buf;
+       } else {
+               DRM_ERROR("Failed to copy guc log\n");
+       }
+
+       i915_gem_object_unpin_map(vma->obj);
+       return;
+}
+
+void i915_guc_load_error_log_free(struct drm_i915_private *i915)
+{
+       if (i915->gpu_error.guc_load_fail_log)
+               kfree(i915->gpu_error.guc_load_fail_log);
+}
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h 
b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 6156845..be83be0 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -77,6 +77,17 @@
 #define GUC_STAGE_DESC_ATTR_PCH                BIT(6)
 #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7)
 
+/*
+ * GuC log buffer size. The first page is to save log buffer state. Allocate 
one
+ * extra page for others in case for overlap
+ */
+#define GUC_LOG_STATE_PAGE     1
+#define GUC_LOG_CRASH_PAGES    2       /* 1 + 1 extra */
+#define GUC_LOG_DPC_PAGES      8       /* 7 + 1 extra */
+#define GUC_LOG_ISR_PAGES      8       /* 7 + 1 extra */
+#define GUC_LOG_SIZE ((GUC_LOG_STATE_PAGE + GUC_LOG_DPC_PAGES + \
+                      GUC_LOG_ISR_PAGES + GUC_LOG_CRASH_PAGES) << PAGE_SHIFT)
+
 /* The guc control data is 10 DWORDs */
 #define GUC_CTL_CTXINFO                        0
 #define   GUC_CTL_CTXNUM_IN16_SHIFT    0
@@ -93,11 +104,8 @@
 #define   GUC_LOG_VALID                        (1 << 0)
 #define   GUC_LOG_NOTIFY_ON_HALF_FULL  (1 << 1)
 #define   GUC_LOG_ALLOC_IN_MEGABYTE    (1 << 3)
-#define   GUC_LOG_CRASH_PAGES          1
 #define   GUC_LOG_CRASH_SHIFT          4
-#define   GUC_LOG_DPC_PAGES            7
 #define   GUC_LOG_DPC_SHIFT            6
-#define   GUC_LOG_ISR_PAGES            7
 #define   GUC_LOG_ISR_SHIFT            9
 #define   GUC_LOG_BUF_ADDR_SHIFT       12
 
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c 
b/drivers/gpu/drm/i915/intel_guc_log.c
index 16d3b87..b357da3 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -524,7 +524,7 @@ int intel_guc_log_create(struct intel_guc *guc)
 {
        struct i915_vma *vma;
        unsigned long offset;
-       uint32_t size, flags;
+       uint32_t flags;
        int ret;
 
        GEM_BUG_ON(guc->log.vma);
@@ -532,12 +532,6 @@ int intel_guc_log_create(struct intel_guc *guc)
        if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
                i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
 
-       /* The first page is to save log buffer state. Allocate one
-        * extra page for others in case for overlap */
-       size = (1 + GUC_LOG_DPC_PAGES + 1 +
-               GUC_LOG_ISR_PAGES + 1 +
-               GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
-
        /* We require SSE 4.1 for fast reads from the GuC log buffer and
         * it should be present on the chipsets supporting GuC based
         * submisssions.
@@ -547,7 +541,7 @@ int intel_guc_log_create(struct intel_guc *guc)
                goto err;
        }
 
-       vma = intel_guc_allocate_vma(guc, size);
+       vma = intel_guc_allocate_vma(guc, GUC_LOG_SIZE);
        if (IS_ERR(vma)) {
                ret = PTR_ERR(vma);
                goto err;
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 7fd75ca..64f2314 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -274,6 +274,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
        guc_disable_communication(guc);
        gen9_reset_guc_interrupts(dev_priv);
+       i915_guc_load_error_log_free(dev_priv);
 
        /* We need to notify the guc whenever we change the GGTT */
        i915_ggtt_enable_guc(dev_priv);
@@ -320,11 +321,11 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
        /* Did we succeded or run out of retries? */
        if (ret)
-               goto err_submission;
+               goto log_capture;
 
        ret = guc_enable_communication(guc);
        if (ret)
-               goto err_submission;
+               goto log_capture;
 
        intel_guc_auth_huc(dev_priv);
        if (i915.enable_guc_submission) {
@@ -350,6 +351,8 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 err_interrupts:
        guc_disable_communication(guc);
        gen9_disable_guc_interrupts(dev_priv);
+log_capture:
+       i915_guc_load_error_log_capture(dev_priv);
 err_submission:
        if (i915.enable_guc_submission)
                i915_guc_submission_fini(dev_priv);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to