Update the way drm_coredump_printer is used based on its documentation
and Xe's code: the main idea is to generate the final version in one go
and then use memcpy to return the chunks requested by the caller of
amdgpu_devcoredump_read.

The generation is moved to a separate worker thread.

This cuts the time to copy the dump from 40s to ~0s on my machine.

Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  5 ++
 .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c  | 66 +++++++++++++++++--
 .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h  |  9 +++
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 057c8bd2ad89..ae81a428cfb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1200,6 +1200,11 @@ struct amdgpu_device {
 
        struct amdgpu_reset_domain      *reset_domain;
 
+#ifdef CONFIG_DEV_COREDUMP
+       /* If a coredump state capture is in progress don't start a new one. */
+       bool coredump_in_progress;
+#endif
+
        struct mutex                    benchmark_mutex;
 
        bool                            scpm_enabled;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
index 42a969512dcc..0808ca98ccd9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c
@@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
 }
 #else
 
+#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024)
+
 const char *hw_ip_names[MAX_HWIP] = {
        [GC_HWIP]               = "GC",
        [HDP_HWIP]              = "HDP",
@@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct 
amdgpu_device *adev,
 }
 
 static ssize_t
-amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
-                       void *data, size_t datalen)
+amdgpu_devcoredump_format(char *buffer, size_t count, struct 
amdgpu_coredump_info *coredump)
 {
        struct drm_printer p;
-       struct amdgpu_coredump_info *coredump = data;
        struct drm_print_iterator iter;
        struct amdgpu_vm_fault_info *fault_info;
        struct amdgpu_ip_block *ip_block;
@@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t 
count,
 
        iter.data = buffer;
        iter.offset = 0;
-       iter.start = offset;
        iter.remain = count;
 
        p = drm_coredump_printer(&iter);
@@ -323,11 +322,60 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, 
size_t count,
        return count - iter.remain;
 }
 
+static ssize_t
+amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count,
+                       void *data, size_t datalen)
+{
+       struct amdgpu_coredump_info *coredump = data;
+       ssize_t byte_copied;
+
+       if (!coredump)
+               return -ENODEV;
+
+       flush_work(&coredump->work);
+
+       if (!coredump->formatted)
+               return -ENODEV;
+
+       if (offset >= coredump->formatted_size)
+               return 0;
+
+       byte_copied = count < coredump->formatted_size - offset ? count :
+               coredump->formatted_size - offset;
+       memcpy(buffer, coredump->formatted + offset, byte_copied);
+
+       return byte_copied;
+}
+
 static void amdgpu_devcoredump_free(void *data)
 {
+       struct amdgpu_coredump_info *coredump = data;
+
+       cancel_work_sync(&coredump->work);
+       coredump->adev->coredump_in_progress = false;
+       kfree(coredump->formatted);
        kfree(data);
 }
 
+static void amdgpu_devcoredump_deferred_work(struct work_struct *work)
+{
+       struct amdgpu_coredump_info *coredump = container_of(work, 
typeof(*coredump), work);
+
+       dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
+                     amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+
+       /* Do a one-time preparation of the coredump output because
+        * repeatingly calling drm_coredump_printer is very slow.
+        */
+       coredump->formatted_size =
+               amdgpu_devcoredump_format(NULL, AMDGPU_CORE_DUMP_SIZE_MAX, 
coredump);
+       coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL);
+       if (!coredump->formatted)
+               return;
+       amdgpu_devcoredump_format(coredump->formatted, 
coredump->formatted_size, coredump);
+       coredump->adev->coredump_in_progress = false;
+}
+
 void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check,
                     bool vram_lost, struct amdgpu_job *job)
 {
@@ -335,10 +383,15 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
        struct amdgpu_coredump_info *coredump;
        struct drm_sched_job *s_job;
 
+       if (adev->coredump_in_progress)
+               return;
+
        coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT);
        if (!coredump)
                return;
 
+       adev->coredump_in_progress = true;
+
        coredump->skip_vram_check = skip_vram_check;
        coredump->reset_vram_lost = vram_lost;
 
@@ -361,8 +414,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool 
skip_vram_check,
 
        ktime_get_ts64(&coredump->reset_time);
 
-       dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT,
-                     amdgpu_devcoredump_read, amdgpu_devcoredump_free);
+       /* Kick off coredump formatting to a worker thread. */
+       INIT_WORK(&coredump->work, amdgpu_devcoredump_deferred_work);
+       queue_work(system_unbound_wq, &coredump->work);
 
        drm_info(dev, "AMDGPU device coredump file has been created\n");
        drm_info(dev, "Check your 
/sys/class/drm/card%d/device/devcoredump/data\n",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
index ef9772c6bcc9..4c37a852b74a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h
@@ -35,9 +35,18 @@ struct amdgpu_coredump_info {
        struct amdgpu_device            *adev;
        struct amdgpu_task_info         reset_task_info;
        struct timespec64               reset_time;
+
+       struct work_struct work;
+
        bool                            skip_vram_check;
        bool                            reset_vram_lost;
        struct amdgpu_ring              *ring;
+       /* Readable form of coredevdump, generate once to speed up
+        * reading it (see drm_coredump_printer's documentation).
+        */
+       ssize_t                         formatted_size;
+       char                            *formatted;
+
 };
 #endif
 
-- 
2.43.0

Reply via email to