Am 2023-06-12 um 12:23 schrieb Mukul Joshi:
Update the invalid PTE flag setting with TF enabled.
This is to ensure, in addition to transitioning the
retry fault to a no-retry fault, it also causes the
wavefront to enter the trap handler. With the current
setting, the fault only transitions to a no-retry fault.
Additionally, have 2 sets of invalid PTE settings, one for
TF enabled, the other for TF disabled. The setting with
TF disabled, doesn't work with TF enabled.

Signed-off-by: Mukul Joshi <mukul.jo...@amd.com>
---
v1->v2:
- Update handling according to Christian's feedback.

  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h   |  7 +++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  6 ++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c |  3 +++
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c     | 11 +++++++++++
  5 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6794edd1d2d2..e5c6b075fbbb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -152,6 +152,10 @@ struct amdgpu_gmc_funcs {
        void (*override_vm_pte_flags)(struct amdgpu_device *dev,
                                      struct amdgpu_vm *vm,
                                      uint64_t addr, uint64_t *flags);
+       /* update no-retry flags */
+       void (*update_vm_pte_noretry_flags)(struct amdgpu_device *dev,
+                                       uint64_t *flags);
+
        /* get the amount of memory used by the vbios for pre-OS console */
        unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
@@ -343,6 +347,9 @@ struct amdgpu_gmc {
  #define amdgpu_gmc_override_vm_pte_flags(adev, vm, addr, pte_flags)   \
        (adev)->gmc.gmc_funcs->override_vm_pte_flags                      \
                ((adev), (vm), (addr), (pte_flags))
+#define amdgpu_gmc_update_vm_pte_noretry_flags(adev, pte_flags)                
\
+       ((adev)->gmc.gmc_funcs->update_vm_pte_noretry_flags               \
+               ((adev), (pte_flags)))
  #define amdgpu_gmc_get_vbios_fb_size(adev) 
(adev)->gmc.gmc_funcs->get_vbios_fb_size((adev))
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1cb14ea18cd9..ff9db7e5c086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2583,7 +2583,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
                /* Intentionally setting invalid PTE flag
                 * combination to force a no-retry-fault
                 */
-               flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT;
+               flags = AMDGPU_VM_NORETRY_FLAGS;
                value = 0;
        } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
                /* Redirect the access to the dummy page */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c85d494f2a2..b81fcb962d8f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -84,7 +84,13 @@ struct amdgpu_mem_stats;
  /* PDE Block Fragment Size for VEGA10 */
  #define AMDGPU_PDE_BFS(a)     ((uint64_t)a << 59)
+/* Flag combination to set no-retry with TF disabled */
+#define AMDGPU_VM_NORETRY_FLAGS        (AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE 
| \
+                               AMDGPU_PTE_TF)
+/* Flag combination to set no-retry with TF enabled */
+#define AMDGPU_VM_NORETRY_FLAGS_TF (AMDGPU_PTE_VALID | AMDGPU_PTE_SYSTEM | \
+                                  AMDGPU_PTE_PRT)
  /* For GFX9 */
  #define AMDGPU_PTE_MTYPE_VG10(a)      ((uint64_t)(a) << 57)
  #define AMDGPU_PTE_MTYPE_VG10_MASK    AMDGPU_PTE_MTYPE_VG10(3ULL)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index dea1a64be44d..39f1650f6d00 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -804,6 +804,9 @@ static void amdgpu_vm_pte_update_flags(struct 
amdgpu_vm_update_params *params,
                flags |= AMDGPU_PTE_EXECUTABLE;
        }
+ if (adev->gmc.translate_further && level == AMDGPU_VM_PTB)
+               amdgpu_gmc_update_vm_pte_noretry_flags(adev, &flags);

Don't you need a check that ((adev)->gmc.gmc_funcs->update_vm_pte_noretry_flags is not NULL? But adding a new callback for this may be overkill. Since the AMDGPU_VM_NORETRY_FLAGS(_TF) are defined in a non-HW-specific header file, you can probably implement the application of those flags in amdgpu_vm_pte_update_flags directly.

Regards,
  Felix


+
        /* APUs mapping system memory may need different MTYPEs on different
         * NUMA nodes. Only do this for contiguous ranges that can be assumed
         * to be on the same NUMA node.
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3ed286b72cae..aea8e80c3419 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1307,6 +1307,16 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device 
*adev,
                                             mapping, flags);
  }
+static void gmc_v9_0_update_vm_pte_noretry_flags(struct amdgpu_device *adev,
+                                          uint64_t *flags)
+{
+       /* Update no retry flags when TF is enabled */
+       if ((*flags & AMDGPU_VM_NORETRY_FLAGS) == AMDGPU_VM_NORETRY_FLAGS) {
+               *flags &= ~AMDGPU_VM_NORETRY_FLAGS;
+               *flags |= AMDGPU_VM_NORETRY_FLAGS_TF;
+       }
+}
+
  static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev,
                                           struct amdgpu_vm *vm,
                                           uint64_t addr, uint64_t *flags)
@@ -1445,6 +1455,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = 
{
        .get_vm_pde = gmc_v9_0_get_vm_pde,
        .get_vm_pte = gmc_v9_0_get_vm_pte,
        .override_vm_pte_flags = gmc_v9_0_override_vm_pte_flags,
+       .update_vm_pte_noretry_flags = gmc_v9_0_update_vm_pte_noretry_flags,
        .get_vbios_fb_size = gmc_v9_0_get_vbios_fb_size,
        .query_mem_partition_mode = &gmc_v9_0_query_memory_partition,
  };

Reply via email to