Update the way drm_coredump_printer is used based on its documentation and Xe's code: the main idea is to generate the final version in one go and then use memcpy to return the chunks requested by the caller of amdgpu_devcoredump_read.
The generation is moved to a separate worker thread. This cuts the time to copy the dump from 40s to ~0s on my machine. Signed-off-by: Pierre-Eric Pelloux-Prayer <[email protected]> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 ++ .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c | 66 +++++++++++++++++-- .../gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h | 9 +++ 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 057c8bd2ad89..ae81a428cfb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1200,6 +1200,11 @@ struct amdgpu_device { struct amdgpu_reset_domain *reset_domain; +#ifdef CONFIG_DEV_COREDUMP + /* If a coredump state capture is in progress don't start a new one. */ + bool coredump_in_progress; +#endif + struct mutex benchmark_mutex; bool scpm_enabled; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 42a969512dcc..0808ca98ccd9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -34,6 +34,8 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, } #else +#define AMDGPU_CORE_DUMP_SIZE_MAX (256 * 1024 * 1024) + const char *hw_ip_names[MAX_HWIP] = { [GC_HWIP] = "GC", [HDP_HWIP] = "HDP", @@ -196,11 +198,9 @@ static void amdgpu_devcoredump_fw_info(struct amdgpu_device *adev, } static ssize_t -amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, - void *data, size_t datalen) +amdgpu_devcoredump_format(char *buffer, size_t count, struct amdgpu_coredump_info *coredump) { struct drm_printer p; - struct amdgpu_coredump_info *coredump = data; struct drm_print_iterator iter; struct amdgpu_vm_fault_info *fault_info; struct amdgpu_ip_block *ip_block; @@ -208,7 +208,6 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, iter.data = buffer; iter.offset = 0; - iter.start = offset; iter.remain = count; p = drm_coredump_printer(&iter); @@ -323,11 +322,60 @@ amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, return count - iter.remain; } +static ssize_t +amdgpu_devcoredump_read(char *buffer, loff_t offset, size_t count, + void *data, size_t datalen) +{ + struct amdgpu_coredump_info *coredump = data; + ssize_t byte_copied; + + if (!coredump) + return -ENODEV; + + flush_work(&coredump->work); + + if (!coredump->formatted) + return -ENODEV; + + if (offset >= coredump->formatted_size) + return 0; + + byte_copied = count < coredump->formatted_size - offset ? count : + coredump->formatted_size - offset; + memcpy(buffer, coredump->formatted + offset, byte_copied); + + return byte_copied; +} + static void amdgpu_devcoredump_free(void *data) { + struct amdgpu_coredump_info *coredump = data; + + cancel_work_sync(&coredump->work); + coredump->adev->coredump_in_progress = false; + kfree(coredump->formatted); kfree(data); } +static void amdgpu_devcoredump_deferred_work(struct work_struct *work) +{ + struct amdgpu_coredump_info *coredump = container_of(work, typeof(*coredump), work); + + dev_coredumpm(coredump->adev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, + amdgpu_devcoredump_read, amdgpu_devcoredump_free); + + /* Do a one-time preparation of the coredump output because + * repeatingly calling drm_coredump_printer is very slow. + */ + coredump->formatted_size = + amdgpu_devcoredump_format(NULL, AMDGPU_CORE_DUMP_SIZE_MAX, coredump); + coredump->formatted = kvzalloc(coredump->formatted_size, GFP_KERNEL); + if (!coredump->formatted) + return; + amdgpu_devcoredump_format(coredump->formatted, coredump->formatted_size, coredump); + coredump->adev->coredump_in_progress = false; +} + void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, bool vram_lost, struct amdgpu_job *job) { @@ -335,10 +383,15 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, struct amdgpu_coredump_info *coredump; struct drm_sched_job *s_job; + if (adev->coredump_in_progress) + return; + coredump = kzalloc(sizeof(*coredump), GFP_NOWAIT); if (!coredump) return; + adev->coredump_in_progress = true; + coredump->skip_vram_check = skip_vram_check; coredump->reset_vram_lost = vram_lost; @@ -361,8 +414,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, ktime_get_ts64(&coredump->reset_time); - dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, - amdgpu_devcoredump_read, amdgpu_devcoredump_free); + /* Kick off coredump formatting to a worker thread. */ + INIT_WORK(&coredump->work, amdgpu_devcoredump_deferred_work); + queue_work(system_unbound_wq, &coredump->work); drm_info(dev, "AMDGPU device coredump file has been created\n"); drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h index ef9772c6bcc9..4c37a852b74a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.h @@ -35,9 +35,18 @@ struct amdgpu_coredump_info { struct amdgpu_device *adev; struct amdgpu_task_info reset_task_info; struct timespec64 reset_time; + + struct work_struct work; + bool skip_vram_check; bool reset_vram_lost; struct amdgpu_ring *ring; + /* Readable form of coredevdump, generate once to speed up + * reading it (see drm_coredump_printer's documentation). + */ + ssize_t formatted_size; + char *formatted; + }; #endif -- 2.43.0
