[PATCH] drm/amdgpu: Fix atomics on GFX12
If PCIe supports atomics, configure register to prevent DF from breaking atomics in separate load/store operations. Signed-off-by: David Belanger Reviewed-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h| 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 5 +++ drivers/gpu/drm/amd/amdgpu/df_v4_15.c | 45 +++ drivers/gpu/drm/amd/amdgpu/df_v4_15.h | 30 + drivers/gpu/drm/amd/amdgpu/soc24.c| 4 ++ .../amd/include/asic_reg/df/df_4_15_offset.h | 28 .../amd/include/asic_reg/df/df_4_15_sh_mask.h | 28 8 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.c create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index eddbb69a179f..ec099aadf334 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -106,7 +106,8 @@ amdgpu-y += \ df_v1_7.o \ df_v3_6.o \ df_v4_3.o \ - df_v4_6_2.o + df_v4_6_2.o \ + df_v4_15.o # add GMC block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 1538b2dbfff1..eb605e79ae0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -33,6 +33,7 @@ struct amdgpu_df_hash_status { struct amdgpu_df_funcs { void (*sw_init)(struct amdgpu_device *adev); void (*sw_fini)(struct amdgpu_device *adev); + void (*hw_init)(struct amdgpu_device *adev); void (*enable_broadcast_mode)(struct amdgpu_device *adev, bool enable); u32 (*get_fb_channel_number)(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index b241f61fe9c9..ac108fca64fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -37,6 +37,7 @@ #include "df_v3_6.h" #include "df_v4_3.h" #include "df_v4_6_2.h" +#include "df_v4_15.h" #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" @@ -2803,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(4, 6, 2): adev->df.funcs = _v4_6_2_funcs; break; + case IP_VERSION(4, 15, 0): + case IP_VERSION(4, 15, 1): + adev->df.funcs = _v4_15_funcs; + break; default: break; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c new file mode 100644 index ..2a573e33908b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c @@ -0,0 +1,45 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "df_v4_15.h" + +#include "df/df_4_15_offset.h" +#include "df/df_4_15_sh_mask.h" + +static void df_v4_15_hw_init(struct amdgpu_device *adev) +{ + if (adev->have_atomics_support) { + uint32_t tmp; + uint32_t dis_lcl_proc = (1 << 1 | + 1 << 2 | + 1 << 13); + + tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1); + tmp |= (dis_lcl_proc << NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT); + WREG32_
[PATCH] drm/amdgpu: Restore uncache behaviour on GFX12
Always use MTYPE_UC if UNCACHED flag is specified. This makes kernarg region uncached and it restores usermode cache disable debug flag functionality. Do not set MTYPE_UC for COHERENT flag, on GFX12 coherence is handled by shader code. Signed-off-by: David Belanger --- drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 21 ++--- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 +--- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e..542225eb13b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -498,9 +498,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, uint64_t *flags) { struct amdgpu_bo *bo = mapping->bo_va->base.bo; - struct amdgpu_device *bo_adev; - bool coherent, is_system; - *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -516,25 +513,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if (!bo) - return; - - if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT | - AMDGPU_GEM_CREATE_UNCACHED)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); - - bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); - coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); - if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; - /* WA for HW bug */ - if (is_system || ((bo_adev != adev) && coherent)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); - + if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); } static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bd9c2921e0dc..7b671aefab01 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1248,13 +1248,7 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_node != node) - mapping_flags |= AMDGPU_VM_MTYPE_NC; - } else { - mapping_flags |= coherent ? - AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - } + mapping_flags |= AMDGPU_VM_MTYPE_NC; break; default: mapping_flags |= coherent ? -- 2.41.0
[PATCH v3] drm/amdkfd: Fixed kfd_process cleanup on module exit.
Handle case when module is unloaded (kfd_exit) before a process space (mm_struct) is released. v2: Fixed potential race conditions by removing all kfd_process from the process table first, then working on releasing the resources. v3: Fixed loop element access / synchronization. Fixed extra empty lines. Signed-off-by: David Belanger --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 1 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 75 +--- 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 09b966dc3768..aee2212e52f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -77,6 +77,7 @@ static int kfd_init(void) static void kfd_exit(void) { + kfd_cleanup_processes(); kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index bfa30d12406b..7e4d992e48b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev); int kfd_process_create_wq(void); void kfd_process_destroy_wq(void); +void kfd_cleanup_processes(void); struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *task); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ebabe92f7edb..5614ef2ac49e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } +static void kfd_process_notifier_release_internal(struct kfd_process *p) +{ + cancel_delayed_work_sync(>eviction_work); + cancel_delayed_work_sync(>restore_work); + + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + + mmu_notifier_put(>mmu_notifier); +} + static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { @@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, return; mutex_lock(_processes_mutex); + /* +* Do early return if table is empty. +* +* This could potentially happen if this function is called concurrently +* by mmu_notifier and by kfd_cleanup_pocesses. +* +*/ + if (hash_empty(kfd_processes_table)) { + mutex_unlock(_processes_mutex); + return; + } hash_del_rcu(>kfd_processes); mutex_unlock(_processes_mutex); synchronize_srcu(_processes_srcu); - cancel_delayed_work_sync(>eviction_work); - cancel_delayed_work_sync(>restore_work); - - /* Indicate to other users that MM is no longer valid */ - p->mm = NULL; - - mmu_notifier_put(>mmu_notifier); + kfd_process_notifier_release_internal(p); } static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { @@ -1200,6 +1216,51 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .free_notifier = kfd_process_free_notifier, }; +void kfd_cleanup_processes(void) +{ + /* +* This code handles the case when driver is being unloaded before all +* mm_struct are released. We need to safely free the kfd_process and +* avoid race conditions with mmu_notifier that might try to free them. +* +*/ + + struct kfd_process *p; + struct hlist_node *p_temp; + unsigned int temp; + HLIST_HEAD(cleanup_list); + + /* +* Move all remaining kfd_process from the process table to a +* temp list for processing. Once done, callback from mmu_notifier +* release will not see the kfd_process in the table and do early return, +* avoiding double free issues. +*/ + mutex_lock(_processes_mutex); + hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) { + hash_del_rcu(>kfd_processes); + synchronize_srcu(_processes_srcu); + hlist_add_head(>kfd_processes, _list); + } + mutex_unlock(_processes_mutex); + + + /* +* Release resources for all outstanding kfd_process collected. +*/ + hlist_for_each_entry_safe(p, p_temp, _list, kfd_processes) + kfd_process_notifier_release_internal(p); + + /* +* Must be called after all mmu_notifier_put are done and before +
[PATCH v2] drm/amdkfd: Fixed kfd_process cleanup on module exit.
Handle case when module is unloaded (kfd_exit) before a process space (mm_struct) is released. v2: Fixed potential race conditions by removing all kfd_process from the process table first, then working on releasing the resources. Signed-off-by: David Belanger --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 80 +--- 2 files changed, 77 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 09b966dc3768..8ef4bd9e4f7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -26,6 +26,9 @@ #include "kfd_priv.h" #include "amdgpu_amdkfd.h" +void kfd_cleanup_processes(void); + + static int kfd_init(void) { int err; @@ -77,6 +80,7 @@ static int kfd_init(void) static void kfd_exit(void) { + kfd_cleanup_processes(); kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ebabe92f7edb..dd396a93a68d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1167,6 +1167,19 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier)); } + +static void kfd_process_notifier_release_internal(struct kfd_process *p) +{ + cancel_delayed_work_sync(>eviction_work); + cancel_delayed_work_sync(>restore_work); + + /* Indicate to other users that MM is no longer valid */ + p->mm = NULL; + + mmu_notifier_put(>mmu_notifier); +} + + static void kfd_process_notifier_release(struct mmu_notifier *mn, struct mm_struct *mm) { @@ -1181,25 +1194,78 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, return; mutex_lock(_processes_mutex); + /* +* Do early return if p is not in the table. +* +* This could potentially happen if this function is called concurrently +* by mmu_notifier and by kfd_cleanup_pocesses. +* +*/ + if (!hash_hashed(>kfd_processes)) { + mutex_unlock(_processes_mutex); + return; + } hash_del_rcu(>kfd_processes); mutex_unlock(_processes_mutex); synchronize_srcu(_processes_srcu); - cancel_delayed_work_sync(>eviction_work); - cancel_delayed_work_sync(>restore_work); - - /* Indicate to other users that MM is no longer valid */ - p->mm = NULL; - - mmu_notifier_put(>mmu_notifier); + kfd_process_notifier_release_internal(p); } + static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .release = kfd_process_notifier_release, .alloc_notifier = kfd_process_alloc_notifier, .free_notifier = kfd_process_free_notifier, }; + +void kfd_cleanup_processes(void) +{ + /* +* This code handles the case when driver is being unloaded before all +* mm_struct are released. We need to safely free the kfd_process and +* avoid race conditions with mmu_notifier that might try to free them. +* +*/ + + struct kfd_process *p; + struct hlist_node *p_temp; + unsigned int temp; + HLIST_HEAD(cleanup_list); + + /* +* Move all remaining kfd_process from the process table to a +* temp list for processing. Once done, callback from mmu_notifier +* release will not see the kfd_process in the table and do early return, +* avoiding double free issues. +*/ + mutex_lock(_processes_mutex); + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + hash_del_rcu(>kfd_processes); + hlist_add_head(>kfd_processes, _list); + } + mutex_unlock(_processes_mutex); + synchronize_srcu(_processes_srcu); + + /* +* Release resources for all outstanding kfd_process collected. +*/ + hlist_for_each_entry_safe(p, p_temp, _list, kfd_processes) { + kfd_process_notifier_release_internal(p); + } + + /* +* Must be called after all mmu_notifier_put are done and before +* kfd_process_wq is released. +* +* Ensures that all outstanding free_notifier get called, triggering +* the release of the kfd_process struct. +*/ + mmu_notifier_synchronize(); +} + + static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; -- 2.38.1
[PATCH] drm/amdkfd: Fixed kfd_process cleanup on module exit.
Handle case when module is unloaded (kfd_exit) before a process space (mm_struct) is released. Signed-off-by: David Belanger --- drivers/gpu/drm/amd/amdkfd/kfd_module.c | 4 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 2 files changed, 61 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c index 09b966dc3768..8ef4bd9e4f7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c @@ -26,6 +26,9 @@ #include "kfd_priv.h" #include "amdgpu_amdkfd.h" +void kfd_cleanup_processes(void); + + static int kfd_init(void) { int err; @@ -77,6 +80,7 @@ static int kfd_init(void) static void kfd_exit(void) { + kfd_cleanup_processes(); kfd_debugfs_fini(); kfd_process_destroy_wq(); kfd_procfs_shutdown(); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ebabe92f7edb..b5b28a32639d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1181,6 +1181,17 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, return; mutex_lock(_processes_mutex); + /* +* Do early return if p is not in the table. +* +* This could potentially happen if this function is called concurrently +* by mmu_notifier and by kfd_cleanup_pocesses. +* +*/ + if (!hash_hashed(>kfd_processes)) { + mutex_unlock(_processes_mutex); + return; + } hash_del_rcu(>kfd_processes); mutex_unlock(_processes_mutex); synchronize_srcu(_processes_srcu); @@ -1200,6 +1211,52 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = { .free_notifier = kfd_process_free_notifier, }; + +void kfd_cleanup_processes(void) +{ + struct kfd_process *p; + unsigned int temp; + + /* +* Iterate over remaining processes in table, calling notifier release +* to free up notifier and process resources. +* +* This code handles the case when driver is unloaded before all mm_struct +* are released. +*/ + int idx = srcu_read_lock(_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { + if (p) { + /* +* Obtain a reference on p to avoid a late mmu_notifier release +* call triggering freeing the process. +*/ + + kref_get(>ref); + + srcu_read_unlock(_processes_srcu, idx); + + kfd_process_notifier_release(>mmu_notifier, p->mm); + + kfd_unref_process(p); + + idx = srcu_read_lock(_processes_srcu); + } + } + srcu_read_unlock(_processes_srcu, idx); + + /* +* Must be called after all mmu_notifier_put are done and before +* kfd_process_wq is released. +* +* Ensures that all outstanding free_notifier gets called, triggering the release +* of the process. +*/ + mmu_notifier_synchronize(); +} + + static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; -- 2.38.1
[PATCH v3] drm/amdgpu: Enable SA software trap.
Enables support for software trap for MES >= 4. Adapted from implementation from Jay Cornwall. v2: Add IP version check in conditions. v3: Remove debugger code changes. Signed-off-by: Jay Cornwall Signed-off-by: David Belanger Reviewed-by: Felix Kuehling Acked-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 6 +- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 771 +- .../amd/amdkfd/cwsr_trap_handler_gfx10.asm| 21 + 3 files changed, 413 insertions(+), 385 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b64cd46a159a..cbc506b958b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -185,7 +185,11 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; - mes_add_queue_pkt.trap_en = 1; + + if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3 + mes_add_queue_pkt.trap_en = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, _add_queue_pkt, sizeof(mes_add_queue_pkt), diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 60a81649cf12..c7118843db05 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -742,7 +742,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf88fffe, 0x877aff7f, 0x0400, 0x8f7a857a, 0x886d7a6d, 0xb97b02dc, - 0x8f7b997b, 0xb97a2a05, + 0x8f7b997b, 0xb97a3a05, 0x807a817a, 0xbf0d997b, 0xbf850002, 0x8f7a897a, 0xbf820001, 0x8f7a8a7a, @@ -819,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbefe037c, 0xbefc0370, 0xf4611c7a, 0xf800, 0x80708470, 0xbefc037e, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -1069,7 +1069,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb9f9f816, 0x876f7bff, 0xf800, 0x906f8b6f, 0xb9efa2c3, 0xb9f3f801, - 0xb96e2a05, 0x806e816e, + 0xb96e3a05, 0x806e816e, 0xbf0d9972, 0xbf850002, 0x8f6e896e, 0xbf820001, 0x8f6e8a6e, 0xb96f1e06, @@ -2114,7 +2114,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x007a, 0x7e000280, 0xbefe037a, 0xbeff037b, 0xb97b02dc, 0x8f7b997b, - 0xb97a2a05, 0x807a817a, + 0xb97a3a05, 0x807a817a, 0xbf0d997b, 0xbf850002, 0x8f7a897a, 0xbf820001, 0x8f7a8a7a, 0xb97b1e06, @@ -2157,7 +2157,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x0100, 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, 0xe0704300, - 0x705d0300, 0xb9702a05, + 0x705d0300, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, @@ -2189,7 +2189,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbefe03ff, 0x, 0xbeff0380, 0xe0704000, 0x705d0200, 0xbefe03c1, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -2475,7 +2475,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xb9ef4803, 0x876f7bff, 0xf800, 0x906f8b6f, 0xb9efa2c3, 0xb9f3f801, - 0xb96e2a05, 0x806e816e, + 0xb96e3a05, 0x806e816e, 0xbf0d9972, 0xbf850002, 0x8f6e896e, 0xbf820001, 0x8f6e8a6e, 0xb96f1e06, @@ -2494,438 +2494,441 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f, 0xbf9f, 0xbf9f, 0x, }; - static const uint32_t cwsr_trap_gfx11_hex[] = { - 0xbfa1, 0xbfa0021b, + 0xbfa1, 0xbfa0021e, 0xb0804006, 0xb8f8f802, - 0x91788678, 0xb8fbf803, - 0x8b6eff78, 0x2000, - 0xbfa10009, 0x8b6eff6d, - 0x00ff, 0xbfa2001e, - 0x8b6eff7b, 0x0400, - 0xbfa20041, 0xbf830010, - 0xb8fbf803, 0xbfa0fffa, - 0x8b6eff7b, 0x0900, - 0xbfa20015, 0x8b6eff7b, - 0x71ff, 0xbfa10008, - 0x8b6fff7b, 0x7080, - 0xbfa10001, 0xbeee1287, - 0xb8eff801, 0x846e8c6e, - 0x8b6e6f6e, 0xbfa2000a, + 0x9178ff78, 0x00020006, + 0xb8fbf803, 0xbf0d9f6d, + 0xbfa20006, 0x8b6eff78, + 0x2000, 0xbfa10009, 0x8b6eff6d, 0x00ff, - 0xbfa20007, 0xb8eef801, - 0x8b
[PATCH] drm/amdgpu: Enable SA software trap.
Enables support for software trap for MES >= 4. Adapted from implementation from Jay Cornwall. v2: Add IP version check in conditions. Signed-off-by: Jay Cornwall Signed-off-by: David Belanger Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 6 +- .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 771 +- .../amd/amdkfd/cwsr_trap_handler_gfx10.asm| 21 + .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c | 26 +- 4 files changed, 437 insertions(+), 387 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index b64cd46a159a..cbc506b958b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -185,7 +185,11 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; - mes_add_queue_pkt.trap_en = 1; + + if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) && + (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) && + (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3 + mes_add_queue_pkt.trap_en = 1; return mes_v11_0_submit_pkt_and_poll_completion(mes, _add_queue_pkt, sizeof(mes_add_queue_pkt), diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 60a81649cf12..c7118843db05 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -742,7 +742,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbf88fffe, 0x877aff7f, 0x0400, 0x8f7a857a, 0x886d7a6d, 0xb97b02dc, - 0x8f7b997b, 0xb97a2a05, + 0x8f7b997b, 0xb97a3a05, 0x807a817a, 0xbf0d997b, 0xbf850002, 0x8f7a897a, 0xbf820001, 0x8f7a8a7a, @@ -819,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xbefe037c, 0xbefc0370, 0xf4611c7a, 0xf800, 0x80708470, 0xbefc037e, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -1069,7 +1069,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = { 0xb9f9f816, 0x876f7bff, 0xf800, 0x906f8b6f, 0xb9efa2c3, 0xb9f3f801, - 0xb96e2a05, 0x806e816e, + 0xb96e3a05, 0x806e816e, 0xbf0d9972, 0xbf850002, 0x8f6e896e, 0xbf820001, 0x8f6e8a6e, 0xb96f1e06, @@ -2114,7 +2114,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x007a, 0x7e000280, 0xbefe037a, 0xbeff037b, 0xb97b02dc, 0x8f7b997b, - 0xb97a2a05, 0x807a817a, + 0xb97a3a05, 0x807a817a, 0xbf0d997b, 0xbf850002, 0x8f7a897a, 0xbf820001, 0x8f7a8a7a, 0xb97b1e06, @@ -2157,7 +2157,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0x0100, 0xe0704100, 0x705d0100, 0xe0704200, 0x705d0200, 0xe0704300, - 0x705d0300, 0xb9702a05, + 0x705d0300, 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, @@ -2189,7 +2189,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbefe03ff, 0x, 0xbeff0380, 0xe0704000, 0x705d0200, 0xbefe03c1, - 0xb9702a05, 0x80708170, + 0xb9703a05, 0x80708170, 0xbf0d9973, 0xbf850002, 0x8f708970, 0xbf820001, 0x8f708a70, 0xb97a1e06, @@ -2475,7 +2475,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xb9ef4803, 0x876f7bff, 0xf800, 0x906f8b6f, 0xb9efa2c3, 0xb9f3f801, - 0xb96e2a05, 0x806e816e, + 0xb96e3a05, 0x806e816e, 0xbf0d9972, 0xbf850002, 0x8f6e896e, 0xbf820001, 0x8f6e8a6e, 0xb96f1e06, @@ -2494,438 +2494,441 @@ static const uint32_t cwsr_trap_gfx10_hex[] = { 0xbf9f, 0xbf9f, 0xbf9f, 0x, }; - static const uint32_t cwsr_trap_gfx11_hex[] = { - 0xbfa1, 0xbfa0021b, + 0xbfa1, 0xbfa0021e, 0xb0804006, 0xb8f8f802, - 0x91788678, 0xb8fbf803, - 0x8b6eff78, 0x2000, - 0xbfa10009, 0x8b6eff6d, - 0x00ff, 0xbfa2001e, - 0x8b6eff7b, 0x0400, - 0xbfa20041, 0xbf830010, - 0xb8fbf803, 0xbfa0fffa, - 0x8b6eff7b, 0x0900, - 0xbfa20015, 0x8b6eff7b, - 0x71ff, 0xbfa10008, - 0x8b6fff7b, 0x7080, - 0xbfa10001, 0xbeee1287, - 0xb8eff801, 0x846e8c6e, - 0x8b6e6f6e, 0xbfa2000a, + 0x9178ff78, 0x00020006, + 0xb8fbf803, 0xbf0d9f6d, + 0xbfa20006, 0x8b6eff78, + 0x2000, 0xbfa10009, 0x8b6eff6d, 0x00ff, - 0xbfa20007, 0xb8eef801, - 0x8b