[PATCH] drm/amdgpu: Fix atomics on GFX12

2024-07-12 Thread David Belanger
If PCIe supports atomics, configure register to prevent DF from
breaking atomics in separate load/store operations.

Signed-off-by: David Belanger 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/Makefile   |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_df.h|  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c |  5 +++
 drivers/gpu/drm/amd/amdgpu/df_v4_15.c | 45 +++
 drivers/gpu/drm/amd/amdgpu/df_v4_15.h | 30 +
 drivers/gpu/drm/amd/amdgpu/soc24.c|  4 ++
 .../amd/include/asic_reg/df/df_4_15_offset.h  | 28 
 .../amd/include/asic_reg/df/df_4_15_sh_mask.h | 28 
 8 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.c
 create mode 100644 drivers/gpu/drm/amd/amdgpu/df_v4_15.h
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_offset.h
 create mode 100644 drivers/gpu/drm/amd/include/asic_reg/df/df_4_15_sh_mask.h

diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile 
b/drivers/gpu/drm/amd/amdgpu/Makefile
index eddbb69a179f..ec099aadf334 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -106,7 +106,8 @@ amdgpu-y += \
df_v1_7.o \
df_v3_6.o \
df_v4_3.o \
-   df_v4_6_2.o
+   df_v4_6_2.o \
+   df_v4_15.o
 
 # add GMC block
 amdgpu-y += \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
index 1538b2dbfff1..eb605e79ae0e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
@@ -33,6 +33,7 @@ struct amdgpu_df_hash_status {
 struct amdgpu_df_funcs {
void (*sw_init)(struct amdgpu_device *adev);
void (*sw_fini)(struct amdgpu_device *adev);
+   void (*hw_init)(struct amdgpu_device *adev);
void (*enable_broadcast_mode)(struct amdgpu_device *adev,
  bool enable);
u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index b241f61fe9c9..ac108fca64fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -37,6 +37,7 @@
 #include "df_v3_6.h"
 #include "df_v4_3.h"
 #include "df_v4_6_2.h"
+#include "df_v4_15.h"
 #include "nbio_v6_1.h"
 #include "nbio_v7_0.h"
 #include "nbio_v7_4.h"
@@ -2803,6 +2804,10 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device 
*adev)
case IP_VERSION(4, 6, 2):
adev->df.funcs = _v4_6_2_funcs;
break;
+   case IP_VERSION(4, 15, 0):
+   case IP_VERSION(4, 15, 1):
+   adev->df.funcs = _v4_15_funcs;
+   break;
default:
break;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v4_15.c 
b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
new file mode 100644
index ..2a573e33908b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/df_v4_15.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2024 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include "amdgpu.h"
+#include "df_v4_15.h"
+
+#include "df/df_4_15_offset.h"
+#include "df/df_4_15_sh_mask.h"
+
+static void df_v4_15_hw_init(struct amdgpu_device *adev)
+{
+   if (adev->have_atomics_support) {
+   uint32_t tmp;
+   uint32_t dis_lcl_proc = (1 <<  1 |
+   1 <<  2 |
+   1 << 13);
+
+   tmp = RREG32_SOC15(DF, 0, regNCSConfigurationRegister1);
+   tmp |= (dis_lcl_proc << 
NCSConfigurationRegister1__DisIntAtomicsLclProcessing__SHIFT);
+   WREG32_

[PATCH] drm/amdgpu: Restore uncache behaviour on GFX12

2024-07-08 Thread David Belanger
Always use MTYPE_UC if UNCACHED flag is specified.

This makes kernarg region uncached and it restores
usermode cache disable debug flag functionality.

Do not set MTYPE_UC for COHERENT flag, on GFX12 coherence is handled by
shader code.

Signed-off-by: David Belanger 
---
 drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c | 21 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c   |  8 +---
 2 files changed, 3 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
index fd3ac483760e..542225eb13b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c
@@ -498,9 +498,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev,
 uint64_t *flags)
 {
struct amdgpu_bo *bo = mapping->bo_va->base.bo;
-   struct amdgpu_device *bo_adev;
-   bool coherent, is_system;
-
 
*flags &= ~AMDGPU_PTE_EXECUTABLE;
*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
@@ -516,25 +513,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device 
*adev,
*flags &= ~AMDGPU_PTE_VALID;
}
 
-   if (!bo)
-   return;
-
-   if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
-  AMDGPU_GEM_CREATE_UNCACHED))
-   *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
-
-   bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
-   coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
-   is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) ||
-   (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT);
-
if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
*flags |= AMDGPU_PTE_DCC;
 
-   /* WA for HW bug */
-   if (is_system || ((bo_adev != adev) && coherent))
-   *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC);
-
+   if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED)
+   *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC);
 }
 
 static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index bd9c2921e0dc..7b671aefab01 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1248,13 +1248,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
break;
case IP_VERSION(12, 0, 0):
case IP_VERSION(12, 0, 1):
-   if (domain == SVM_RANGE_VRAM_DOMAIN) {
-   if (bo_node != node)
-   mapping_flags |= AMDGPU_VM_MTYPE_NC;
-   } else {
-   mapping_flags |= coherent ?
-   AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
-   }
+   mapping_flags |= AMDGPU_VM_MTYPE_NC;
break;
default:
mapping_flags |= coherent ?
-- 
2.41.0



[PATCH v3] drm/amdkfd: Fixed kfd_process cleanup on module exit.

2023-03-13 Thread David Belanger
Handle case when module is unloaded (kfd_exit) before a process space
(mm_struct) is released.

v2: Fixed potential race conditions by removing all kfd_process from
the process table first, then working on releasing the resources.

v3: Fixed loop element access / synchronization.  Fixed extra empty lines.

Signed-off-by: David Belanger 
---
 drivers/gpu/drm/amd/amdkfd/kfd_module.c  |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 75 +---
 3 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 09b966dc3768..aee2212e52f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -77,6 +77,7 @@ static int kfd_init(void)
 
 static void kfd_exit(void)
 {
+   kfd_cleanup_processes();
kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_procfs_shutdown();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bfa30d12406b..7e4d992e48b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);
 
 int kfd_process_create_wq(void);
 void kfd_process_destroy_wq(void);
+void kfd_cleanup_processes(void);
 struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *task);
 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ebabe92f7edb..5614ef2ac49e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct 
mmu_notifier *mn)
kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
 }
 
+static void kfd_process_notifier_release_internal(struct kfd_process *p)
+{
+   cancel_delayed_work_sync(>eviction_work);
+   cancel_delayed_work_sync(>restore_work);
+
+   /* Indicate to other users that MM is no longer valid */
+   p->mm = NULL;
+
+   mmu_notifier_put(>mmu_notifier);
+}
+
 static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
 {
@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
return;
 
mutex_lock(_processes_mutex);
+   /*
+* Do early return if table is empty.
+*
+* This could potentially happen if this function is called concurrently
+* by mmu_notifier and by kfd_cleanup_pocesses.
+*
+*/
+   if (hash_empty(kfd_processes_table)) {
+   mutex_unlock(_processes_mutex);
+   return;
+   }
hash_del_rcu(>kfd_processes);
mutex_unlock(_processes_mutex);
synchronize_srcu(_processes_srcu);
 
-   cancel_delayed_work_sync(>eviction_work);
-   cancel_delayed_work_sync(>restore_work);
-
-   /* Indicate to other users that MM is no longer valid */
-   p->mm = NULL;
-
-   mmu_notifier_put(>mmu_notifier);
+   kfd_process_notifier_release_internal(p);
 }
 
 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
@@ -1200,6 +1216,51 @@ static const struct mmu_notifier_ops 
kfd_process_mmu_notifier_ops = {
.free_notifier = kfd_process_free_notifier,
 };
 
+void kfd_cleanup_processes(void)
+{
+   /*
+* This code handles the case when driver is being unloaded before all
+* mm_struct are released.  We need to safely free the kfd_process and
+* avoid race conditions with mmu_notifier that might try to free them.
+*
+*/
+
+   struct kfd_process *p;
+   struct hlist_node *p_temp;
+   unsigned int temp;
+   HLIST_HEAD(cleanup_list);
+
+   /*
+* Move all remaining kfd_process from the process table to a
+* temp list for processing.   Once done, callback from mmu_notifier
+* release will not see the kfd_process in the table and do early 
return,
+* avoiding double free issues.
+*/
+   mutex_lock(_processes_mutex);
+   hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) 
{
+   hash_del_rcu(>kfd_processes);
+   synchronize_srcu(_processes_srcu);
+   hlist_add_head(>kfd_processes, _list);
+   }
+   mutex_unlock(_processes_mutex);
+
+
+   /*
+* Release resources for all outstanding kfd_process collected.
+*/
+   hlist_for_each_entry_safe(p, p_temp, _list, kfd_processes)
+   kfd_process_notifier_release_internal(p);
+
+   /*
+* Must be called after all mmu_notifier_put are done and before
+   

[PATCH v2] drm/amdkfd: Fixed kfd_process cleanup on module exit.

2023-03-08 Thread David Belanger
Handle case when module is unloaded (kfd_exit) before a process space
(mm_struct) is released.

v2: Fixed potential race conditions by removing all kfd_process from
the process table first, then working on releasing the resources.

Signed-off-by: David Belanger 
---
 drivers/gpu/drm/amd/amdkfd/kfd_module.c  |  4 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 80 +---
 2 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 09b966dc3768..8ef4bd9e4f7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -26,6 +26,9 @@
 #include "kfd_priv.h"
 #include "amdgpu_amdkfd.h"
 
+void kfd_cleanup_processes(void);
+
+
 static int kfd_init(void)
 {
int err;
@@ -77,6 +80,7 @@ static int kfd_init(void)
 
 static void kfd_exit(void)
 {
+   kfd_cleanup_processes();
kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_procfs_shutdown();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ebabe92f7edb..dd396a93a68d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1167,6 +1167,19 @@ static void kfd_process_free_notifier(struct 
mmu_notifier *mn)
kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
 }
 
+
+static void kfd_process_notifier_release_internal(struct kfd_process *p)
+{
+   cancel_delayed_work_sync(>eviction_work);
+   cancel_delayed_work_sync(>restore_work);
+
+   /* Indicate to other users that MM is no longer valid */
+   p->mm = NULL;
+
+   mmu_notifier_put(>mmu_notifier);
+}
+
+
 static void kfd_process_notifier_release(struct mmu_notifier *mn,
struct mm_struct *mm)
 {
@@ -1181,25 +1194,78 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
return;
 
mutex_lock(_processes_mutex);
+   /*
+* Do early return if p is not in the table.
+*
+* This could potentially happen if this function is called concurrently
+* by mmu_notifier and by kfd_cleanup_pocesses.
+*
+*/
+   if (!hash_hashed(>kfd_processes)) {
+   mutex_unlock(_processes_mutex);
+   return;
+   }
hash_del_rcu(>kfd_processes);
mutex_unlock(_processes_mutex);
synchronize_srcu(_processes_srcu);
 
-   cancel_delayed_work_sync(>eviction_work);
-   cancel_delayed_work_sync(>restore_work);
-
-   /* Indicate to other users that MM is no longer valid */
-   p->mm = NULL;
-
-   mmu_notifier_put(>mmu_notifier);
+   kfd_process_notifier_release_internal(p);
 }
 
+
 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
.release = kfd_process_notifier_release,
.alloc_notifier = kfd_process_alloc_notifier,
.free_notifier = kfd_process_free_notifier,
 };
 
+
+void kfd_cleanup_processes(void)
+{
+   /*
+* This code handles the case when driver is being unloaded before all
+* mm_struct are released.  We need to safely free the kfd_process and
+* avoid race conditions with mmu_notifier that might try to free them.
+*
+*/
+
+   struct kfd_process *p;
+   struct hlist_node *p_temp;
+   unsigned int temp;
+   HLIST_HEAD(cleanup_list);
+
+   /*
+* Move all remaining kfd_process from the process table to a
+* temp list for processing.   Once done, callback from mmu_notifier
+* release will not see the kfd_process in the table and do early 
return,
+* avoiding double free issues.
+*/
+   mutex_lock(_processes_mutex);
+   hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+   hash_del_rcu(>kfd_processes);
+   hlist_add_head(>kfd_processes, _list);
+   }
+   mutex_unlock(_processes_mutex);
+   synchronize_srcu(_processes_srcu);
+
+   /*
+* Release resources for all outstanding kfd_process collected.
+*/
+   hlist_for_each_entry_safe(p, p_temp, _list, kfd_processes) {
+   kfd_process_notifier_release_internal(p);
+   }
+
+   /*
+* Must be called after all mmu_notifier_put are done and before
+* kfd_process_wq is released.
+*
+* Ensures that all outstanding free_notifier get called, triggering
+* the release of the kfd_process struct.
+*/
+   mmu_notifier_synchronize();
+}
+
+
 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 {
unsigned long  offset;
-- 
2.38.1



[PATCH] drm/amdkfd: Fixed kfd_process cleanup on module exit.

2023-03-06 Thread David Belanger
Handle case when module is unloaded (kfd_exit) before a process space
(mm_struct) is released.

Signed-off-by: David Belanger 
---
 drivers/gpu/drm/amd/amdkfd/kfd_module.c  |  4 ++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 57 
 2 files changed, 61 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 09b966dc3768..8ef4bd9e4f7d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -26,6 +26,9 @@
 #include "kfd_priv.h"
 #include "amdgpu_amdkfd.h"
 
+void kfd_cleanup_processes(void);
+
+
 static int kfd_init(void)
 {
int err;
@@ -77,6 +80,7 @@ static int kfd_init(void)
 
 static void kfd_exit(void)
 {
+   kfd_cleanup_processes();
kfd_debugfs_fini();
kfd_process_destroy_wq();
kfd_procfs_shutdown();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ebabe92f7edb..b5b28a32639d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1181,6 +1181,17 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
return;
 
mutex_lock(_processes_mutex);
+   /*
+* Do early return if p is not in the table.
+*
+* This could potentially happen if this function is called concurrently
+* by mmu_notifier and by kfd_cleanup_pocesses.
+*
+*/
+   if (!hash_hashed(>kfd_processes)) {
+   mutex_unlock(_processes_mutex);
+   return;
+   }
hash_del_rcu(>kfd_processes);
mutex_unlock(_processes_mutex);
synchronize_srcu(_processes_srcu);
@@ -1200,6 +1211,52 @@ static const struct mmu_notifier_ops 
kfd_process_mmu_notifier_ops = {
.free_notifier = kfd_process_free_notifier,
 };
 
+
+void kfd_cleanup_processes(void)
+{
+   struct kfd_process *p;
+   unsigned int temp;
+
+   /*
+* Iterate over remaining processes in table, calling notifier release
+* to free up notifier and process resources.
+*
+* This code handles the case when driver is unloaded before all 
mm_struct
+* are released.
+*/
+   int idx = srcu_read_lock(_processes_srcu);
+
+   hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
+   if (p) {
+   /*
+* Obtain a reference on p to avoid a late mmu_notifier 
release
+* call triggering freeing the process.
+*/
+
+   kref_get(>ref);
+
+   srcu_read_unlock(_processes_srcu, idx);
+
+   kfd_process_notifier_release(>mmu_notifier, p->mm);
+
+   kfd_unref_process(p);
+
+   idx = srcu_read_lock(_processes_srcu);
+   }
+   }
+   srcu_read_unlock(_processes_srcu, idx);
+
+   /*
+* Must be called after all mmu_notifier_put are done and before
+* kfd_process_wq is released.
+*
+* Ensures that all outstanding free_notifier gets called, triggering 
the release
+* of the process.
+*/
+   mmu_notifier_synchronize();
+}
+
+
 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 {
unsigned long  offset;
-- 
2.38.1



[PATCH v3] drm/amdgpu: Enable SA software trap.

2022-09-22 Thread David Belanger
Enables support for software trap for MES >= 4.
Adapted from implementation from Jay Cornwall.

v2: Add IP version check in conditions.
v3: Remove debugger code changes.

Signed-off-by: Jay Cornwall 
Signed-off-by: David Belanger 
Reviewed-by: Felix Kuehling 
Acked-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|   6 +-
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 771 +-
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|  21 +
 3 files changed, 413 insertions(+), 385 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index b64cd46a159a..cbc506b958b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -185,7 +185,11 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
-   mes_add_queue_pkt.trap_en = 1;
+
+   if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) &&
+ (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
+ (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3
+   mes_add_queue_pkt.trap_en = 1;
 
return mes_v11_0_submit_pkt_and_poll_completion(mes,
_add_queue_pkt, sizeof(mes_add_queue_pkt),
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 60a81649cf12..c7118843db05 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -742,7 +742,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf88fffe, 0x877aff7f,
0x0400, 0x8f7a857a,
0x886d7a6d, 0xb97b02dc,
-   0x8f7b997b, 0xb97a2a05,
+   0x8f7b997b, 0xb97a3a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
@@ -819,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf800,
0x80708470, 0xbefc037e,
-   0xb9702a05, 0x80708170,
+   0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -1069,7 +1069,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb9f9f816, 0x876f7bff,
0xf800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
-   0xb96e2a05, 0x806e816e,
+   0xb96e3a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0xb96f1e06,
@@ -2114,7 +2114,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x007a, 0x7e000280,
0xbefe037a, 0xbeff037b,
0xb97b02dc, 0x8f7b997b,
-   0xb97a2a05, 0x807a817a,
+   0xb97a3a05, 0x807a817a,
0xbf0d997b, 0xbf850002,
0x8f7a897a, 0xbf820001,
0x8f7a8a7a, 0xb97b1e06,
@@ -2157,7 +2157,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0100, 0xe0704100,
0x705d0100, 0xe0704200,
0x705d0200, 0xe0704300,
-   0x705d0300, 0xb9702a05,
+   0x705d0300, 0xb9703a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
0xbf820001, 0x8f708a70,
@@ -2189,7 +2189,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbefe03ff, 0x,
0xbeff0380, 0xe0704000,
0x705d0200, 0xbefe03c1,
-   0xb9702a05, 0x80708170,
+   0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -2475,7 +2475,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb9ef4803, 0x876f7bff,
0xf800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
-   0xb96e2a05, 0x806e816e,
+   0xb96e3a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0xb96f1e06,
@@ -2494,438 +2494,441 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f, 0xbf9f,
0xbf9f, 0x,
 };
-
 static const uint32_t cwsr_trap_gfx11_hex[] = {
-   0xbfa1, 0xbfa0021b,
+   0xbfa1, 0xbfa0021e,
0xb0804006, 0xb8f8f802,
-   0x91788678, 0xb8fbf803,
-   0x8b6eff78, 0x2000,
-   0xbfa10009, 0x8b6eff6d,
-   0x00ff, 0xbfa2001e,
-   0x8b6eff7b, 0x0400,
-   0xbfa20041, 0xbf830010,
-   0xb8fbf803, 0xbfa0fffa,
-   0x8b6eff7b, 0x0900,
-   0xbfa20015, 0x8b6eff7b,
-   0x71ff, 0xbfa10008,
-   0x8b6fff7b, 0x7080,
-   0xbfa10001, 0xbeee1287,
-   0xb8eff801, 0x846e8c6e,
-   0x8b6e6f6e, 0xbfa2000a,
+   0x9178ff78, 0x00020006,
+   0xb8fbf803, 0xbf0d9f6d,
+   0xbfa20006, 0x8b6eff78,
+   0x2000, 0xbfa10009,
0x8b6eff6d, 0x00ff,
-   0xbfa20007, 0xb8eef801,
-   0x8b

[PATCH] drm/amdgpu: Enable SA software trap.

2022-09-22 Thread David Belanger
Enables support for software trap for MES >= 4.
Adapted from implementation from Jay Cornwall.

v2: Add IP version check in conditions.

Signed-off-by: Jay Cornwall 
Signed-off-by: David Belanger 
Reviewed-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|   6 +-
 .../gpu/drm/amd/amdkfd/cwsr_trap_handler.h| 771 +-
 .../amd/amdkfd/cwsr_trap_handler_gfx10.asm|  21 +
 .../gpu/drm/amd/amdkfd/kfd_int_process_v11.c  |  26 +-
 4 files changed, 437 insertions(+), 387 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index b64cd46a159a..cbc506b958b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -185,7 +185,11 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
-   mes_add_queue_pkt.trap_en = 1;
+
+   if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) &&
+ (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
+ (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3
+   mes_add_queue_pkt.trap_en = 1;
 
return mes_v11_0_submit_pkt_and_poll_completion(mes,
_add_queue_pkt, sizeof(mes_add_queue_pkt),
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h 
b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 60a81649cf12..c7118843db05 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -742,7 +742,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbf88fffe, 0x877aff7f,
0x0400, 0x8f7a857a,
0x886d7a6d, 0xb97b02dc,
-   0x8f7b997b, 0xb97a2a05,
+   0x8f7b997b, 0xb97a3a05,
0x807a817a, 0xbf0d997b,
0xbf850002, 0x8f7a897a,
0xbf820001, 0x8f7a8a7a,
@@ -819,7 +819,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xbefe037c, 0xbefc0370,
0xf4611c7a, 0xf800,
0x80708470, 0xbefc037e,
-   0xb9702a05, 0x80708170,
+   0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -1069,7 +1069,7 @@ static const uint32_t cwsr_trap_nv1x_hex[] = {
0xb9f9f816, 0x876f7bff,
0xf800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
-   0xb96e2a05, 0x806e816e,
+   0xb96e3a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0xb96f1e06,
@@ -2114,7 +2114,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x007a, 0x7e000280,
0xbefe037a, 0xbeff037b,
0xb97b02dc, 0x8f7b997b,
-   0xb97a2a05, 0x807a817a,
+   0xb97a3a05, 0x807a817a,
0xbf0d997b, 0xbf850002,
0x8f7a897a, 0xbf820001,
0x8f7a8a7a, 0xb97b1e06,
@@ -2157,7 +2157,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0x0100, 0xe0704100,
0x705d0100, 0xe0704200,
0x705d0200, 0xe0704300,
-   0x705d0300, 0xb9702a05,
+   0x705d0300, 0xb9703a05,
0x80708170, 0xbf0d9973,
0xbf850002, 0x8f708970,
0xbf820001, 0x8f708a70,
@@ -2189,7 +2189,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbefe03ff, 0x,
0xbeff0380, 0xe0704000,
0x705d0200, 0xbefe03c1,
-   0xb9702a05, 0x80708170,
+   0xb9703a05, 0x80708170,
0xbf0d9973, 0xbf850002,
0x8f708970, 0xbf820001,
0x8f708a70, 0xb97a1e06,
@@ -2475,7 +2475,7 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xb9ef4803, 0x876f7bff,
0xf800, 0x906f8b6f,
0xb9efa2c3, 0xb9f3f801,
-   0xb96e2a05, 0x806e816e,
+   0xb96e3a05, 0x806e816e,
0xbf0d9972, 0xbf850002,
0x8f6e896e, 0xbf820001,
0x8f6e8a6e, 0xb96f1e06,
@@ -2494,438 +2494,441 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
0xbf9f, 0xbf9f,
0xbf9f, 0x,
 };
-
 static const uint32_t cwsr_trap_gfx11_hex[] = {
-   0xbfa1, 0xbfa0021b,
+   0xbfa1, 0xbfa0021e,
0xb0804006, 0xb8f8f802,
-   0x91788678, 0xb8fbf803,
-   0x8b6eff78, 0x2000,
-   0xbfa10009, 0x8b6eff6d,
-   0x00ff, 0xbfa2001e,
-   0x8b6eff7b, 0x0400,
-   0xbfa20041, 0xbf830010,
-   0xb8fbf803, 0xbfa0fffa,
-   0x8b6eff7b, 0x0900,
-   0xbfa20015, 0x8b6eff7b,
-   0x71ff, 0xbfa10008,
-   0x8b6fff7b, 0x7080,
-   0xbfa10001, 0xbeee1287,
-   0xb8eff801, 0x846e8c6e,
-   0x8b6e6f6e, 0xbfa2000a,
+   0x9178ff78, 0x00020006,
+   0xb8fbf803, 0xbf0d9f6d,
+   0xbfa20006, 0x8b6eff78,
+   0x2000, 0xbfa10009,
0x8b6eff6d, 0x00ff,
-   0xbfa20007, 0xb8eef801,
-   0x8b