Re: [PATCH v3 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2023-03-20 Thread Marek Olšák
I think we should do it differently because this interface will be mostly
unused by open source userspace in its current form.

Let's set the workload hint in drm_amdgpu_ctx_in::flags, and that will be
immutable for the lifetime of the context. No other interface is needed.

Marek

On Mon, Sep 26, 2022 at 5:41 PM Shashank Sharma 
wrote:

> Allow the user to specify a workload hint to the kernel.
> We can use these to tweak the dpm heuristics to better match
> the workload for improved performance.
>
> V3: Create only set() workload UAPI (Christian)
>
> Signed-off-by: Alex Deucher 
> Signed-off-by: Shashank Sharma 
> ---
>  include/uapi/drm/amdgpu_drm.h | 17 +
>  1 file changed, 17 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index c2c9c674a223..23d354242699 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -212,6 +212,7 @@ union drm_amdgpu_bo_list {
>  #define AMDGPU_CTX_OP_QUERY_STATE2 4
>  #define AMDGPU_CTX_OP_GET_STABLE_PSTATE5
>  #define AMDGPU_CTX_OP_SET_STABLE_PSTATE6
> +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 7
>
>  /* GPU reset status */
>  #define AMDGPU_CTX_NO_RESET0
> @@ -252,6 +253,17 @@ union drm_amdgpu_bo_list {
>  #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
>  #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
>
> +/* GPU workload hints, flag bits 8-15 */
> +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
> +#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff <<
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 <<
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_3D(1 <<
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 <<
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_VR(3 <<
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 <<
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +#define AMDGPU_CTX_WORKLOAD_HINT_MAX  AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
> +#define AMDGPU_CTX_WORKLOAD_INDEX(n)  (n >>
> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
> +
>  struct drm_amdgpu_ctx_in {
> /** AMDGPU_CTX_OP_* */
> __u32   op;
> @@ -281,6 +293,11 @@ union drm_amdgpu_ctx_out {
> __u32   flags;
> __u32   _pad;
> } pstate;
> +
> +   struct {
> +   __u32   flags;
> +   __u32   _pad;
> +   } workload;
>  };
>
>  union drm_amdgpu_ctx {
> --
> 2.34.1
>
>


[PATCH 2/3] drm/amdgpu: reinit mes ip block during reset on SRIOV

2023-03-20 Thread YiPeng Chai
Reinit mes ip block during reset on SRIOV.

Signed-off-by: YiPeng Chai 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2ffbaa1a4ce2..d74d05802566 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3147,6 +3147,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct 
amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_DCE,
AMD_IP_BLOCK_TYPE_GFX,
AMD_IP_BLOCK_TYPE_SDMA,
+   AMD_IP_BLOCK_TYPE_MES,
AMD_IP_BLOCK_TYPE_UVD,
AMD_IP_BLOCK_TYPE_VCE,
AMD_IP_BLOCK_TYPE_VCN
-- 
2.34.1



[PATCH 3/3] drm/amdgpu: resume ras for gfx v11_0_3 during reset on SRIOV

2023-03-20 Thread YiPeng Chai
Gfx v11_0_3 supports ras on SRIOV, so need to resume ras
during reset.

Signed-off-by: YiPeng Chai 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index d74d05802566..14d756caf839 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -5313,8 +5313,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (r)
adev->asic_reset_res = r;
 
-   /* Aldebaran supports ras in SRIOV, so need resume ras during 
reset */
-   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
+   /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need 
resume ras during reset */
+   if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
+   adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3))
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
-- 
2.34.1



[PATCH 1/3] drm/amdgpu: enable ras for mp0 v13_0_10 on SRIOV

2023-03-20 Thread YiPeng Chai
Enable ras for mp0 v13_0_10 on SRIOV.

Signed-off-by: YiPeng Chai 
Reviewed-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 63dfcc98152d..94a3deb994ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -2340,6 +2340,7 @@ static bool amdgpu_ras_asic_supported(struct 
amdgpu_device *adev)
if (amdgpu_sriov_vf(adev)) {
switch (adev->ip_versions[MP0_HWIP][0]) {
case IP_VERSION(13, 0, 2):
+   case IP_VERSION(13, 0, 10):
return true;
default:
return false;
-- 
2.34.1



RE: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2023-03-20 Thread Quan, Evan
[AMD Official Use Only - General]

OK, I see. Thanks for the explanations.

BR
Evan
> -Original Message-
> From: Koenig, Christian 
> Sent: Tuesday, March 21, 2023 1:32 AM
> To: Kim, Jonathan ; Christian König
> ; Quan, Evan ;
> amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix 
> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access
> performance using sdma
> 
> Ah, yes! GART doesn't need to be read to make a GTT allocation.
> 
> When GART becomes ready it will be filled with all the buffers which were
> allocated before it was ready.
> 
> So this is perfectly fine.
> 
> Thanks,
> Christian.
> 
> Am 20.03.23 um 18:24 schrieb Kim, Jonathan:
> > [Public]
> >
> > This was a long time ago but I think we agreed allocation was ok before
> GART was ready.
> > IIRC, there was also some mentioned related scenario where APUs needed
> to work without VRAM but allocations were required (but I don't know the
> details regarding that).
> > I vaguely remember the requirement for GART readiness for the bounce
> buffer allocation caused some problems elsewhere.
> > Are there problems observed with the bounce buffer being allocated
> without GART readiness?
> >
> > Thanks,
> >
> > Jon
> >> -Original Message-
> >> From: Christian König 
> >> Sent: Monday, March 20, 2023 1:02 PM
> >> To: Quan, Evan ; Kim, Jonathan
> >> ; amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix ; Koenig, Christian
> >> 
> >> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access
> >> performance using sdma
> >>
> >> Caution: This message originated from an External Source. Use proper
> >> caution when opening attachments, clicking links, or responding.
> >>
> >>
> >> I don't think so. Have we recently re-ordered something here?
> >>
> >> Christian.
> >>
> >> Am 20.03.23 um 08:05 schrieb Quan, Evan:
> >>> [AMD Official Use Only - General]
> >>>
> >>> I happened to find the sdma_access_bo allocation from GTT seems
> >> performing before gart is ready.
> >>> That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is
> >>> still
> >> NULL.
> >>> Is that done intentionally ?
> >>>
> >>> Evan
>  -Original Message-
>  From: amd-gfx  On Behalf
> Of
>  Jonathan Kim
>  Sent: Wednesday, January 5, 2022 3:12 AM
>  To: amd-gfx@lists.freedesktop.org
>  Cc: Kuehling, Felix ; Kim, Jonathan
>  ; Koenig, Christian
> >> 
>  Subject: [PATCH] drm/amdgpu: improve debug VRAM access
> performance
>  using sdma
> 
>  For better performance during VRAM access for debugged processes,
>  do read/write copies over SDMA.
> 
>  In order to fulfill post mortem debugging on a broken device,
>  fallback to stable MMIO access when gpu recovery is disabled or
>  when job
> >> submission
>  time outs are set to max.  Failed SDMA access should automatically
>  fall back to MMIO access.
> 
>  Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
>  page-table updates and TLB flushes on access.
> 
>  Signed-off-by: Jonathan Kim 
>  ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
>  +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
> 2 files changed, 82 insertions(+), 1 deletion(-)
> 
>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>  b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>  index 367abed1d6e6..512df4c09772 100644
>  --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>  +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>  @@ -48,6 +48,7 @@
> #include 
> 
> #include 
>  +#include 
> 
> #include "amdgpu.h"
> #include "amdgpu_object.h"
>  @@ -1429,6 +1430,70 @@ static void
> >> amdgpu_ttm_vram_mm_access(struct
>  amdgpu_device *adev, loff_t pos,
>    }
> }
> 
>  +static int amdgpu_ttm_access_memory_sdma(struct
> ttm_buffer_object
>  *bo,
>  +unsigned long offset, void
>  + *buf, int
>  len, int write)
>  +{
>  +struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
>  +struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
>  +struct amdgpu_job *job;
>  +struct dma_fence *fence;
>  +uint64_t src_addr, dst_addr;
>  +unsigned int num_dw;
>  +int r, idx;
>  +
>  +if (len != PAGE_SIZE)
>  +return -EINVAL;
>  +
>  +if (!adev->mman.sdma_access_ptr)
>  +return -EACCES;
>  +
>  +r = drm_dev_enter(adev_to_drm(adev), &idx);
>  +if (r)
>  +return r;
>  +
>  +if (write)
>  +memcpy(adev->mman.sdma_access_ptr, buf, len);
>  +
>  +num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
>  +r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
>  AMDGPU_IB_POOL_DELAYED, &job);
>  +if (r)
>  +goto out;
>  +
>  +src

Re: [PATCH 07/11] drm/amdgpu: add UAPI to query GFX shadow sizes

2023-03-20 Thread Marek Olšák
On Mon, Mar 20, 2023 at 1:38 PM Alex Deucher 
wrote:

> Add UAPI to query the GFX shadow buffer requirements
> for preemption on GFX11.  UMDs need to specify the shadow
> areas for preemption.
>
> Signed-off-by: Alex Deucher 
> ---
>  include/uapi/drm/amdgpu_drm.h | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 3d9474af6566..19a806145371 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -886,6 +886,7 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> #define AMDGPU_INFO_VIDEO_CAPS_DECODE   0
> /* Subquery id: Encode */
> #define AMDGPU_INFO_VIDEO_CAPS_ENCODE   1
> +#define AMDGPU_INFO_CP_GFX_SHADOW_SIZE 0x22
>

Can you put this into the device structure instead? Let's minimize the
number of kernel queries as much as possible.

Thanks,
Marek


Re: [PATCH 19/32] drm/amdkfd: add runtime enable operation

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

The debugger can attach to a process prior to HSA enablement (i.e.
inferior is spawned by the debugger and attached to immediately before
target process has been enabled for HSA dispatches) or it
can attach to a running target that is already HSA enabled.  Either
way, the debugger needs to know the enablement status to know when
it can inspect queues.

For the scenario where the debugger spawns the target process,
it will have to wait for ROCr's runtime enable request from the target.
The runtime enable request will be able to see that its process has been
debug attached.  ROCr raises an EC_PROCESS_RUNTIME signal to the
debugger then blocks the target process while waiting the debugger's
response. Once the debugger has received the runtime signal, it will
unblock the target process.

For the scenario where the debugger attaches to a running target
process, ROCr will set the target process' runtime status as enabled so
that on an attach request, the debugger will be able to see this
status and will continue with debug enablement as normal.

A secondary requirement is to conditionally enable the trap tempories only
if the user requests it (env var HSA_ENABLE_DEBUG=1) or if the debugger
attaches with HSA runtime enabled.  This is because setting up the trap
temporaries incurs a performance overhead that is unacceptable for
microbench performance in normal mode for certain customers.

In the scenario where the debugger spawns the target process, when ROCr
detects that the debugger has attached during the runtime enable
request, it will enable the trap temporaries before it blocks the target
process while waiting for the debugger to respond.

In the scenario where the debugger attaches to a running target process,
it will enable to trap temporaries itself.

Finally, there is an additional restriction that is required to be
enforced with runtime enable and HW debug mode setting. The debugger must
first ensure that HW debug mode has been enabled before permitting HW debug
mode operations.

With single process debug devices, allowing the debugger to set debug
HW modes prior to trap activation means that debug HW mode setting can
occur before the KFD has reserved the debug VMID (0xf) from the hardware
scheduler's VMID allocation resource pool.  This can result in the
hardware scheduler assigning VMID 0xf to a non-debugged process and
having that process inherit debug HW mode settings intended for the
debugged target process instead, which is both incorrect and potentially
fatal for normal mode operation.

With multi process debug devices, allowing the debugger to set debug
HW modes prior to trap activation means that non-debugged processes
migrating to a new VMID could inherit unintended debug settings.

All debug operations that touch HW settings must require trap activation
where trap activation is triggered by both debug attach and runtime
enablement (target has KFD opened and is ready to dispatch work).

v2: fix up hierarchy of semantics in description.

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 150 ++-
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c   |   6 +-
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h   |   4 +
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   1 +
  4 files changed, 157 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 09fe8576dc8c..46f9d453dc5e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2654,11 +2654,147 @@ static int kfd_ioctl_criu(struct file *filep, struct 
kfd_process *p, void *data)
return ret;
  }
  
-static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data)

+static int runtime_enable(struct kfd_process *p, uint64_t r_debug,
+   bool enable_ttmp_setup)
  {
+   int i = 0, ret = 0;
+
+   if (p->is_runtime_retry)
+   goto retry;
+
+   if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED)
+   return -EBUSY;
+
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_process_device *pdd = p->pdds[i];
+
+   if (pdd->qpd.queue_count)
+   return -EEXIST;
+   }
+
+   p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED;
+   p->runtime_info.r_debug = r_debug;
+   p->runtime_info.ttmp_setup = enable_ttmp_setup;
+
+   if (p->runtime_info.ttmp_setup) {
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_process_device *pdd = p->pdds[i];
+
+   if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) {
+   amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+   pdd->dev->kfd2kgd->enable_debug_trap(
+   pdd->dev->adev,
+  

Re: [PATCH 07/11] drm/amdgpu: add UAPI to query GFX shadow sizes

2023-03-20 Thread Marek Olšák
On Mon, Mar 20, 2023 at 1:38 PM Alex Deucher 
wrote:

> Add UAPI to query the GFX shadow buffer requirements
> for preemption on GFX11.  UMDs need to specify the shadow
> areas for preemption.
>
> Signed-off-by: Alex Deucher 
> ---
>  include/uapi/drm/amdgpu_drm.h | 10 ++
>  1 file changed, 10 insertions(+)
>
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index 3d9474af6566..19a806145371 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -886,6 +886,7 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
> #define AMDGPU_INFO_VIDEO_CAPS_DECODE   0
> /* Subquery id: Encode */
> #define AMDGPU_INFO_VIDEO_CAPS_ENCODE   1
> +#define AMDGPU_INFO_CP_GFX_SHADOW_SIZE 0x22
>
>  #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
>  #define AMDGPU_INFO_MMR_SE_INDEX_MASK  0xff
> @@ -1203,6 +1204,15 @@ struct drm_amdgpu_info_video_caps {
> struct drm_amdgpu_info_video_codec_info
> codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
>  };
>
> +struct drm_amdgpu_info_cp_gfx_shadow_size {
> +   __u32 shadow_size;
> +   __u32 shadow_alignment;
> +   __u32 csa_size;
> +   __u32 csa_alignment;
> +   __u32 gds_size;
> +   __u32 gds_alignment;
>

Can you document the fields? What is CSA? Also, why is GDS there when the
hw deprecated it and replaced it with GDS registers?

Thanks,
Marek


Re: [PATCH 18/32] drm/amdkfd: add send exception operation

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

Add a debug operation that allows the debugger to send an exception
directly to runtime through a payload address.

For memory violations, normal vmfault signals will be applied to
notify runtime instead after passing in the saved exception data
when a memory violation was raised to the debugger.

For runtime exceptions, this will unblock the runtime enable
function which will be explained and implemented in a follow up
patch.

Signed-off-by: Jonathan Kim 
---
  .../gpu/drm/amd/amdkfd/cik_event_interrupt.c  |  4 +-
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  5 ++
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 44 
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  5 ++
  drivers/gpu/drm/amd/amdkfd/kfd_events.c   |  3 +-
  .../gpu/drm/amd/amdkfd/kfd_int_process_v9.c   |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  7 +-
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 71 ++-
  8 files changed, 135 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c 
b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index 5c8023cba196..62a38cd820fc 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -118,9 +118,9 @@ static void cik_event_interrupt_wq(struct kfd_dev *dev,
return;
  
  		if (info.vmid == vmid)

-   kfd_signal_vm_fault_event(dev, pasid, &info);
+   kfd_signal_vm_fault_event(dev, pasid, &info, NULL);
else
-   kfd_signal_vm_fault_event(dev, pasid, NULL);
+   kfd_signal_vm_fault_event(dev, pasid, NULL, NULL);
}
  }
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

index 628178126d3b..09fe8576dc8c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -2738,6 +2738,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, 
struct kfd_process *p, v
r = kfd_dbg_trap_disable(target);
break;
case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT:
+   r = kfd_dbg_send_exception_to_runtime(target,
+   args->send_runtime_event.gpu_id,
+   args->send_runtime_event.queue_id,
+   args->send_runtime_event.exception_mask);
+   break;
case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED:
case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE:
case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index fcd064b13f6a..4174b479ea6f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -125,6 +125,49 @@ bool kfd_dbg_ev_raise(uint64_t event_mask,
return is_subscribed;
  }
  
+int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,

+   unsigned int dev_id,
+   unsigned int queue_id,
+   uint64_t error_reason)
+{
+   if (error_reason & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+   struct kfd_process_device *pdd = NULL;
+   struct kfd_hsa_memory_exception_data *data;
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++) {
+   if (p->pdds[i]->dev->id == dev_id) {
+   pdd = p->pdds[i];
+   break;
+   }
+   }
+
+   if (!pdd)
+   return -ENODEV;
+
+   data = (struct kfd_hsa_memory_exception_data *)
+   pdd->vm_fault_exc_data;
+
+   kfd_dqm_evict_pasid(pdd->dev->dqm, p->pasid);
+   kfd_signal_vm_fault_event(pdd->dev, p->pasid, NULL, data);
+   error_reason &= ~KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION);
+   }
+
+   if (error_reason & (KFD_EC_MASK(EC_PROCESS_RUNTIME))) {
+   /*
+* block should only happen after the debugger receives runtime
+* enable notice.
+*/
+   up(&p->runtime_enable_sema);
+   error_reason &= ~KFD_EC_MASK(EC_PROCESS_RUNTIME);
+   }
+
+   if (error_reason)
+   return kfd_send_exception_to_runtime(p, queue_id, error_reason);
+
+   return 0;
+}
+
  static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
  {
struct mqd_update_info minfo = {0};
@@ -175,6 +218,7 @@ static int kfd_dbg_set_workaround(struct kfd_process 
*target, bool enable)
}
  
  	return r;

+}


Ah, here you're fixing up the mistake from the last patch. Hint: An easy 
way to compile-test every patch in a large patch series is 

Re: [PATCH 17/32] drm/amdkfd: add raise exception event function

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

Exception events can be generated from interrupts or queue activitity.

The raise event function will save exception status of a queue, device
or process then notify the debugger of the status change by writing to
a debugger polled file descriptor that the debugger provides during
debug attach.

For memory violation exceptions, extra exception data will be saved.

The debugger will be able to query the saved exception states by query
operation that will be provided by follow up patches.

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 91 +-
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h |  5 ++
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  7 ++
  3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 659dfc7411fe..fcd064b13f6a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -38,6 +38,93 @@ void debug_event_write_work_handler(struct work_struct *work)
kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
  }
  
+/* update process/device/queue exception status, write to descriptor

+ * only if exception_status is enabled.
+ */
+bool kfd_dbg_ev_raise(uint64_t event_mask,
+   struct kfd_process *process, struct kfd_dev *dev,
+   unsigned int source_id, bool use_worker,
+   void *exception_data, size_t exception_data_size)
+{
+   struct process_queue_manager *pqm;
+   struct process_queue_node *pqn;
+   int i;
+   static const char write_data = '.';
+   loff_t pos = 0;
+   bool is_subscribed = true;
+
+   if (!(process && process->debug_trap_enabled))
+   return false;
+
+   mutex_lock(&process->event_mutex);
+
+   if (event_mask & KFD_EC_MASK_DEVICE) {
+   for (i = 0; i < process->n_pdds; i++) {
+   struct kfd_process_device *pdd = process->pdds[i];
+
+   if (pdd->dev != dev)
+   continue;
+
+   pdd->exception_status |= event_mask & 
KFD_EC_MASK_DEVICE;
+
+   if (event_mask & 
KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+   if (!pdd->vm_fault_exc_data) {
+   pdd->vm_fault_exc_data = kmemdup(
+   exception_data,
+   exception_data_size,
+   GFP_KERNEL);
+   if (!pdd->vm_fault_exc_data)
+   pr_debug("Failed to allocate 
exception data memory");
+   } else {
+   pr_debug("Debugger exception data not 
saved\n");
+   print_hex_dump_bytes("exception data: ",
+   DUMP_PREFIX_OFFSET,
+   exception_data,
+   exception_data_size);
+   }
+   }
+   break;
+   }
+   } else if (event_mask & KFD_EC_MASK_PROCESS) {
+   process->exception_status |= event_mask & KFD_EC_MASK_PROCESS;
+   } else {
+   pqm = &process->pqm;
+   list_for_each_entry(pqn, &pqm->queues,
+   process_queue_list) {
+   int target_id;
+
+   if (!pqn->q)
+   continue;
+
+   target_id = event_mask & KFD_EC_MASK(EC_QUEUE_NEW) ?
+   pqn->q->properties.queue_id :
+   pqn->q->doorbell_id;
+
+   if (pqn->q->device != dev || target_id != source_id)
+   continue;
+
+   pqn->q->properties.exception_status |= event_mask;
+   break;
+   }
+   }
+
+   if (process->exception_enable_mask & event_mask) {
+   if (use_worker)
+   schedule_work(&process->debug_event_workarea);


The worker definition should be in the same patch.



+   else
+   kernel_write(process->dbg_ev_file,
+   &write_data,
+   1,
+   &pos);
+   } else {
+   is_subscribed = false;
+   }
+
+   mutex_unlock(&process->event_mutex);
+
+   return is_subscribed;
+}
+
  static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
  {
struct mqd_update_info m

Re: [PATCH 16/32] drm/amdkfd: add per process hw trap enable and disable functions

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

To enable HW debug mode per process, all devices must be debug enabled
successfully.  If a failure occures, rewind the enablement of debug mode
on the enabled devices.

A power management scenario that needs to be considered is HW
debug mode setting during GFXOFF.  During GFXOFF, these registers
will be unreachable so we have to transiently disable GFXOFF when
setting.  Also, some devices don't support the RLC save restore
function for these debug registers so we have to disable GFXOFF
completely during a debug session.

Cooperative launch also has debugging restriction based on HW/FW bugs.
If such bugs exists, the debugger cannot attach to a process that uses GWS
resources nor can GWS resources be requested if a process is being
debugged.

Multi-process debug devices can only enable trap temporaries based
on certain runtime scenerios, which will be explained when the
runtime enable functions are implemented in a follow up patch.

v2: add gfx11 support. fix fw checks. remove asic family name comments.

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |   5 +
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 148 +-
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  29 
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |   1 +
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |   9 ++
  5 files changed, 190 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index f5f639de28f0..628178126d3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1453,6 +1453,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep,
goto out_unlock;
}
  
+	if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) {

+   retval = -EBUSY;
+   goto out_unlock;
+   }
+
retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws 
: NULL);
mutex_unlock(&p->mutex);
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c

index 6e99a0160275..659dfc7411fe 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -21,6 +21,7 @@
   */
  
  #include "kfd_debug.h"

+#include "kfd_device_queue_manager.h"
  #include 
  
  void debug_event_write_work_handler(struct work_struct *work)

@@ -101,11 +102,68 @@ static int kfd_dbg_set_mes_debug_mode(struct 
kfd_process_device *pdd)
pdd->watch_points, flags);
  }
  
+/* kfd_dbg_trap_deactivate:

+ * target: target process
+ * unwind: If this is unwinding a failed kfd_dbg_trap_enable()
+ * unwind_count:
+ * If unwind == true, how far down the pdd list we need
+ * to unwind
+ * else: ignored
+ */
+static void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, 
int unwind_count)
+{
+   int i, count = 0;
+
+   for (i = 0; i < target->n_pdds; i++) {
+   struct kfd_process_device *pdd = target->pdds[i];
+
+   /* If this is an unwind, and we have unwound the required
+* enable calls on the pdd list, we need to stop now
+* otherwise we may mess up another debugger session.
+*/
+   if (unwind && count == unwind_count)
+   break;
+
+   /* GFX off is already disabled by debug activate if not RLC 
restore supported. */
+   if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+   amdgpu_gfx_off_ctrl(pdd->dev->adev, false);
+   pdd->spi_dbg_override =
+   pdd->dev->kfd2kgd->disable_debug_trap(
+   pdd->dev->adev,
+   target->runtime_info.ttmp_setup,
+   pdd->dev->vm_info.last_vmid_kfd);
+   if (kfd_dbg_is_rlc_restore_supported(pdd->dev))
+   amdgpu_gfx_off_ctrl(pdd->dev->adev, true);


Shouldn't this reenable GFXOFF unconditionally? It should not stay 
disabled on devices without RLC restore support, because we're ending 
the debug session here.




+
+   if (!kfd_dbg_is_per_vmid_supported(pdd->dev) &&
+   release_debug_trap_vmid(pdd->dev->dqm, 
&pdd->qpd))
+   pr_err("Failed to release debug vmid on [%i]\n", 
pdd->dev->id);
+
+   if (!pdd->dev->shared_resources.enable_mes)
+   debug_refresh_runlist(pdd->dev->dqm);
+   else
+   kfd_dbg_set_mes_debug_mode(pdd);
+
+   count++;


Isn't count the same as i? Why do we need a separate variable here?

Regards,
  Felix



+   }
+
+   kfd_dbg_set_workaround(target, false);
+}
+
  int kfd_dbg_trap_disable(struct kfd_process *t

Re: [PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

Due to a HW bug, waves in only half the shader arrays can enter trap.

When starting a debug session, relocate all waves to the first shader
array of each shader engine and mask off the 2nd shader array as
unavailable.

When ending a debug session, re-enable the 2nd shader array per
shader engine.

User CU masking per queue cannot be guaranteed to remain functional
if requested during debugging (e.g. user cu mask requests only 2nd shader
array as an available resource leading to zero HW resources available)
nor can runtime be alerted of any of these changes during execution.

Make user CU masking and debugging mutual exclusive with respect to
availability.

If the debugger tries to attach to a process with a user cu masked
queue, return the runtime status as enabled but busy.

If the debugger tries to attach and fails to reallocate queue waves to
the first shader array of each shader engine, return the runtime status
as enabled but with an error.

In addition, like any other mutli-process debug supported devices,
disable trap temporary setup per-process to avoid performance impact from
setup overhead.

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   |  2 +
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c|  7 +-
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 -
  drivers/gpu/drm/amd/amdkfd/kfd_debug.c| 64 +++
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  3 +-
  .../drm/amd/amdkfd/kfd_device_queue_manager.c |  7 ++
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  3 +-
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  3 +-
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 42 
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  3 +-
  .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  3 +-
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 +-
  .../amd/amdkfd/kfd_process_queue_manager.c|  9 ++-
  13 files changed, 124 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index d20df0cf0d88..b5f5eed2b5ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -219,6 +219,8 @@ struct mes_add_queue_input {
uint32_tgws_size;
uint64_ttba_addr;
uint64_ttma_addr;
+   uint32_ttrap_en;
+   uint32_tskip_process_ctx_clear;
uint32_tis_kfd_process;
uint32_tis_aql_queue;
uint32_tqueue_size;
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index fbacdc42efac..38c7a0cbf264 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -197,17 +197,14 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
+   mes_add_queue_pkt.trap_en = input->trap_en;
+   mes_add_queue_pkt.skip_process_ctx_clear = 
input->skip_process_ctx_clear;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
  
  	/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */

mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
  
-	if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 4) &&

- (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0)) &&
- (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3
-   mes_add_queue_pkt.trap_en = 1;
-
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL 
queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
mes_add_queue_pkt.gds_size = input->queue_size;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index ee05c2e54ef6..f5f639de28f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -530,8 +530,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct 
kfd_process *p,
goto out;
}
  
-	minfo.update_flag = UPDATE_FLAG_CU_MASK;

-
mutex_lock(&p->mutex);
  
  	retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index f6ea6db266b4..6e99a0160275 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -37,6 +37,70 @@ void debug_event_write_work_handler(struct work_struct *work)
kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
  }
  
+static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)

+{
+   struct mqd_update_info minfo = {0};
+   int err;
+
+   if (!q || (!q->pro

RE: [PATCH v2] drm/amdgpu/nv: Apply ASPM quirk on Intel ADL + AMD Navi

2023-03-20 Thread Limonciello, Mario
[Public]



> -Original Message-
> From: Kai-Heng Feng 
> Sent: Wednesday, March 15, 2023 07:07
> To: Deucher, Alexander ; Koenig, Christian
> ; Pan, Xinhui 
> Cc: Kai-Heng Feng ; David Airlie
> ; Daniel Vetter ; Zhang, Hawking
> ; Gao, Likun ; Kuehling,
> Felix ; Zhao, Victor ;
> Xiao, Jack ; Quan, Evan ;
> Limonciello, Mario ; Lazar, Lijo
> ; Chai, Thomas ; Andrey
> Grodzovsky ; Somalapuram, Amaranath
> ; Zhang, Bokun
> ; Liu, Leo ; Gopalakrishnan,
> Veerabadhran (Veera) ; Gong,
> Richard ; Feng, Kenneth
> ; Jiansong Chen ;
> amd-gfx@lists.freedesktop.org; dri-de...@lists.freedesktop.org; linux-
> ker...@vger.kernel.org
> Subject: [PATCH v2] drm/amdgpu/nv: Apply ASPM quirk on Intel ADL + AMD
> Navi
> 
> S2idle resume freeze can be observed on Intel ADL + AMD WX5500. This is
> caused by commit 0064b0ce85bb ("drm/amd/pm: enable ASPM by default").
> 
> The root cause is still not clear for now.
> 
> So extend and apply the ASPM quirk from commit e02fe3bc7aba
> ("drm/amdgpu: vi: disable ASPM on Intel Alder Lake based systems"), to
> workaround the issue on Navi cards too.
> 
> Fixes: 0064b0ce85bb ("drm/amd/pm: enable ASPM by default")
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2458
> Reviewed-by: Alex Deucher 
> Signed-off-by: Kai-Heng Feng 

Reviewed-by: Mario Limonciello 

I've applied to this to amd-staging-drm-next, thanks!

> ---
> v2:
>  - Rename the quirk function.
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h|  1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 15 +++
>  drivers/gpu/drm/amd/amdgpu/nv.c|  2 +-
>  drivers/gpu/drm/amd/amdgpu/vi.c| 17 +
>  4 files changed, 18 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 164141bc8b4a..5f3b139c1f99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1272,6 +1272,7 @@ void amdgpu_device_pci_config_reset(struct
> amdgpu_device *adev);
>  int amdgpu_device_pci_reset(struct amdgpu_device *adev);
>  bool amdgpu_device_need_post(struct amdgpu_device *adev);
>  bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
> +bool amdgpu_device_aspm_support_quirk(void);
> 
>  void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64
> num_bytes,
> u64 num_vis_bytes);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index c4a4e2fe6681..05a34ff79e78 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -80,6 +80,10 @@
> 
>  #include 
> 
> +#if IS_ENABLED(CONFIG_X86)
> +#include 
> +#endif
> +
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -1356,6 +1360,17 @@ bool amdgpu_device_should_use_aspm(struct
> amdgpu_device *adev)
>   return pcie_aspm_enabled(adev->pdev);
>  }
> 
> +bool amdgpu_device_aspm_support_quirk(void)
> +{
> +#if IS_ENABLED(CONFIG_X86)
> + struct cpuinfo_x86 *c = &cpu_data(0);
> +
> + return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> +#else
> + return true;
> +#endif
> +}
> +
>  /* if we get transitioned to only one device, take VGA back */
>  /**
>   * amdgpu_device_vga_set_decode - enable/disable vga decode
> diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c
> b/drivers/gpu/drm/amd/amdgpu/nv.c
> index 855d390c41de..26733263913e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nv.c
> @@ -578,7 +578,7 @@ static void nv_pcie_gen3_enable(struct
> amdgpu_device *adev)
> 
>  static void nv_program_aspm(struct amdgpu_device *adev)
>  {
> - if (!amdgpu_device_should_use_aspm(adev))
> + if (!amdgpu_device_should_use_aspm(adev) ||
> !amdgpu_device_aspm_support_quirk())
>   return;
> 
>   if (!(adev->flags & AMD_IS_APU) &&
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c
> b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 12ef782eb478..ceab8783575c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -81,10 +81,6 @@
>  #include "mxgpu_vi.h"
>  #include "amdgpu_dm.h"
> 
> -#if IS_ENABLED(CONFIG_X86)
> -#include 
> -#endif
> -
>  #define ixPCIE_LC_L1_PM_SUBSTATE 0x100100C6
>  #define
> PCIE_LC_L1_PM_SUBSTATE__LC_L1_SUBSTATES_OVERRIDE_EN_MASK
>   0x0001L
>  #define PCIE_LC_L1_PM_SUBSTATE__LC_PCI_PM_L1_2_OVERRIDE_MASK
>   0x0002L
> @@ -1138,24 +1134,13 @@ static void vi_enable_aspm(struct
> amdgpu_device *adev)
>   WREG32_PCIE(ixPCIE_LC_CNTL, data);
>  }
> 
> -static bool aspm_support_quirk_check(void)
> -{
> -#if IS_ENABLED(CONFIG_X86)
> - struct cpuinfo_x86 *c = &cpu_data(0);
> -
> - return !(c->x86 == 6 && c->x86_model == INTEL_FAM6_ALDERLAKE);
> -#else
> - return true;
> -#endif
> -}
> -
>  static void vi_program_aspm(struct amdgpu_d

Re: [PATCH 14/32] drm/amdgpu: expose debug api for mes

2023-03-20 Thread Felix Kuehling

On 2023-01-25 14:53, Jonathan Kim wrote:

Similar to the F32 HWS, the RS64 HWS for GFX11 now supports a multi-process
debug API.

The skip_process_ctx_clear ADD_QUEUE requirement is to prevent the MES
from clearing the process context when the first queue is added to the
scheduler in order to maintain debug mode settings during queue preemption
and restore.  The MES clears the process context in this case due to an
unresolved FW caching bug during normal mode operations.
During debug mode, the KFD will hold a reference to the target process
so the process context should never go stale and MES can afford to skip
this requirement.

Signed-off-by: Jonathan Kim 


Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   | 32 +++
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h   | 20 
  drivers/gpu/drm/amd/amdgpu/mes_v11_0.c| 12 +++
  drivers/gpu/drm/amd/include/mes_v11_api_def.h | 21 +++-
  4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 82e27bd4f038..4916e0b0156f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -924,6 +924,38 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, 
uint32_t reg,
return r;
  }
  
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,

+   uint64_t process_context_addr,
+   uint32_t spi_gdbg_per_vmid_cntl,
+   const uint32_t *tcp_watch_cntl,
+   uint32_t flags)
+{
+   struct mes_misc_op_input op_input = {0};
+   int r;
+
+   if (!adev->mes.funcs->misc_op) {
+   DRM_ERROR("mes set shader debugger is not supported!\n");
+   return -EINVAL;
+   }
+
+   op_input.op = MES_MISC_OP_SET_SHADER_DEBUGGER;
+   op_input.set_shader_debugger.process_context_addr = 
process_context_addr;
+   op_input.set_shader_debugger.flags.u32all = flags;
+   op_input.set_shader_debugger.spi_gdbg_per_vmid_cntl = 
spi_gdbg_per_vmid_cntl;
+   memcpy(op_input.set_shader_debugger.tcp_watch_cntl, tcp_watch_cntl,
+   sizeof(op_input.set_shader_debugger.tcp_watch_cntl));
+
+   amdgpu_mes_lock(&adev->mes);
+
+   r = adev->mes.funcs->misc_op(&adev->mes, &op_input);
+   if (r)
+   DRM_ERROR("failed to set_shader_debugger\n");
+
+   amdgpu_mes_unlock(&adev->mes);
+
+   return r;
+}
+
  static void
  amdgpu_mes_ring_to_queue_props(struct amdgpu_device *adev,
   struct amdgpu_ring *ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 547ec35691fa..d20df0cf0d88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -256,6 +256,7 @@ enum mes_misc_opcode {
MES_MISC_OP_READ_REG,
MES_MISC_OP_WRM_REG_WAIT,
MES_MISC_OP_WRM_REG_WR_WAIT,
+   MES_MISC_OP_SET_SHADER_DEBUGGER,
  };
  
  struct mes_misc_op_input {

@@ -278,6 +279,20 @@ struct mes_misc_op_input {
uint32_t   reg0;
uint32_t   reg1;
} wrm_reg;
+
+   struct {
+   uint64_t process_context_addr;
+   union {
+   struct {
+   uint64_t single_memop : 1;
+   uint64_t single_alu_op : 1;
+   uint64_t reserved: 30;
+   };
+   uint32_t u32all;
+   } flags;
+   uint32_t spi_gdbg_per_vmid_cntl;
+   uint32_t tcp_watch_cntl[4];
+   } set_shader_debugger;
};
  };
  
@@ -340,6 +355,11 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,

  int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
  uint32_t reg0, uint32_t reg1,
  uint32_t ref, uint32_t mask);
+int amdgpu_mes_set_shader_debugger(struct amdgpu_device *adev,
+   uint64_t process_context_addr,
+   uint32_t spi_gdbg_per_vmid_cntl,
+   const uint32_t *tcp_watch_cntl,
+   uint32_t flags);
  
  int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,

int queue_type, int idx,
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index 62cdd2113135..fbacdc42efac 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -334,6 +334,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
  

Re: [PATCH] drm/amd/display: use a more accurate check in dm_helpers_dp_read_dpcd()

2023-03-20 Thread Harry Wentland



On 3/10/23 12:51, Ville Syrjälä wrote:
> On Fri, Mar 10, 2023 at 07:48:04PM +0200, Ville Syrjälä wrote:
>> On Thu, Mar 09, 2023 at 04:30:27PM -0500, Hamza Mahfooz wrote:
>>> We should be checking if drm_dp_dpcd_read() returns the size that we are
>>> asking it to read instead of just checking if it is greater than zero.
>>> Also, we should WARN_ON() here since this condition is only ever met, if
>>> there is an issue worth investigating. So, compare the return value of
>>> drm_dp_dpcd_read() to size and WARN_ON() if they aren't equal.
>>>
>>> Signed-off-by: Hamza Mahfooz 
>>> ---
>>>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 4 ++--
>>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
>>> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
>>> index 8d598b322e5b..ed2ed7b1d869 100644
>>> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
>>> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
>>> @@ -511,8 +511,8 @@ bool dm_helpers_dp_read_dpcd(
>>> return false;
>>> }
>>>  
>>> -   return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address,
>>> -   data, size) > 0;
>>> +   return !WARN_ON(drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address,
>>> +data, size) != size);
>>
>> Just FYI there are devices out there that violate the DP spec and reads
>> from specific DPCD registers simply fail instead of returning the
>> expected 0.
> 
> And of course anyone can yank the cable anytime, so in
> fact pretty much any DPCD read can fail.
> 

Thanks for making this very important point. It seems like drm_dp_dpcd_access
checks for that, though, and returns -EPROTO if !(ret == size). So I don't
expect this patch to change any behavior.

Harry





Re: [PATCH v2] drm/amd/display: use a more accurate check in dm_helpers_dp_read_dpcd()

2023-03-20 Thread Harry Wentland
On 3/10/23 13:42, Hamza Mahfooz wrote:
> We should be checking if drm_dp_dpcd_read() returns the size that we are
> asking it to read instead of just checking if it is greater than zero.
> So, compare the return value of drm_dp_dpcd_read() to the requested
> read size.
> 
> Signed-off-by: Hamza Mahfooz 

I don't think we can ever hit a case where the return value is greater
than 0 but not equal to size as drm_dp_dpcd_access already checks for it.

Either way, the stricter check makes sense and is
Reviewed-by: Harry Wentland 

Harry

> ---
> v2: drop the WARN_ON().
> ---
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> index 8d598b322e5b..9c1e91c2179e 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
> @@ -511,8 +511,8 @@ bool dm_helpers_dp_read_dpcd(
>   return false;
>   }
>  
> - return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address,
> - data, size) > 0;
> + return drm_dp_dpcd_read(&aconnector->dm_dp_aux.aux, address, data,
> + size) == size;
>  }
>  
>  bool dm_helpers_dp_write_dpcd(



Re: [PATCH 13/32] drm/amdgpu: prepare map process for multi-process debug devices

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

Unlike single process debug devices, multi-process debug devices allow
debug mode setting per-VMID (non-device-global).

Because the HWS manages PASID-VMID mapping, the new MAP_PROCESS API allows
the KFD to forward the required SPI debug register write requests.

To request a new debug mode setting change, the KFD must be able to
preempt all queues then remap all queues with these new setting
requests for MAP_PROCESS to take effect.

Note that by default, trap enablement in non-debug mode must be disabled
for performance reasons for multi-process debug devices due to setup
overhead in FW.

v2: remove asic family code name comment in per vmid support check

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdkfd/kfd_debug.h|  7 +++
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 50 +++
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  3 ++
  .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 15 ++
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  9 
  drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  5 ++
  6 files changed, 89 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index 8aa7a3ad4e97..53c5a3e55bd2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -32,5 +32,12 @@ int kfd_dbg_trap_disable(struct kfd_process *target);
  int kfd_dbg_trap_enable(struct kfd_process *target, uint32_t fd,
void __user *runtime_info,
uint32_t *runtime_info_size);
+
+static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_dev *dev)
+{
+   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2);
+}
+
  void debug_event_write_work_handler(struct work_struct *work);
+
  #endif
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 0cd3a5e9ff25..2517716d7cbc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -2554,6 +2554,56 @@ int release_debug_trap_vmid(struct device_queue_manager 
*dqm,
return r;
  }
  
+int debug_lock_and_unmap(struct device_queue_manager *dqm)

+{
+   int r;
+
+   if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+   pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+   return -EINVAL;
+   }
+
+   if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
+   return 0;
+
+   dqm_lock(dqm);
+
+   r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, 
false);
+   if (r)
+   dqm_unlock(dqm);
+
+   return r;
+}
+
+int debug_map_and_unlock(struct device_queue_manager *dqm)
+{
+   int r;
+
+   if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+   pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+   return -EINVAL;
+   }
+
+   if (!kfd_dbg_is_per_vmid_supported(dqm->dev))
+   return 0;
+
+   r = map_queues_cpsch(dqm);
+
+   dqm_unlock(dqm);
+
+   return r;
+}
+
+int debug_refresh_runlist(struct device_queue_manager *dqm)
+{
+   int r = debug_lock_and_unmap(dqm);
+
+   if (r)
+   return r;
+
+   return debug_map_and_unlock(dqm);
+}
+
  #if defined(CONFIG_DEBUG_FS)
  
  static void seq_reg_dump(struct seq_file *m,

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 0cb1504d24cf..bef3be84c5cc 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -286,6 +286,9 @@ int reserve_debug_trap_vmid(struct device_queue_manager 
*dqm,
struct qcm_process_device *qpd);
  int release_debug_trap_vmid(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
+int debug_lock_and_unmap(struct device_queue_manager *dqm);
+int debug_map_and_unlock(struct device_queue_manager *dqm);
+int debug_refresh_runlist(struct device_queue_manager *dqm);
  
  static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)

  {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
index 363cf8e005cc..f19c506da23d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
@@ -88,6 +88,10 @@ static int pm_map_process_aldebaran(struct packet_manager 
*pm,
  {
struct pm4_mes_map_process_aldebaran *packet;
uint64_t vm_page_table_base_addr = qpd->page_table_base;
+   struct kfd_dev *kfd = pm->dqm->dev;
+   struct kfd_process_device *pdd =
+   container_of(qpd, struct kfd_process_device, qpd);
+   int i;
  
  	packet = (struct pm4_mes_map_process_aldebaran *)buffer;

Re: [PATCH 12/32] drm/amdkfd: prepare map process for single process debug devices

2023-03-20 Thread Felix Kuehling



On 2023-01-25 14:53, Jonathan Kim wrote:

Older HW only supports debugging on a single process because the
SPI debug mode setting registers are device global.

The HWS has supplied a single pinned VMID (0xf) for MAP_PROCESS
for debug purposes. To pin the VMID, the KFD will remove the VMID from
the HWS dynamic VMID allocation via SET_RESOUCES so that a debugged
process will never migrate away from its pinned VMID.

The KFD is responsible for reserving and releasing this pinned VMID
accordingly whenever the debugger attaches and detaches respectively.

Signed-off-by: Jonathan Kim 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 101 +-
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |   5 +
  .../drm/amd/amdkfd/kfd_packet_manager_v9.c|   9 ++
  .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   |   5 +-
  4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7556f80d41e4..0cd3a5e9ff25 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1490,7 +1490,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
dqm->active_cp_queue_count = 0;
dqm->gws_queue_count = 0;
dqm->active_runlist = false;
-   INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);
+   dqm->trap_debug_vmid = 0;


Are you removing the INIT_WORK on purpose here? Looks like a mistake 
that would break GPU recovery.



  
  	init_sdma_bitmaps(dqm);
  
@@ -1933,8 +1933,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm,

if (!dqm->dev->shared_resources.enable_mes) {
decrement_queue_count(dqm, qpd, q);
retval = execute_queues_cpsch(dqm,
- 
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0,
- USE_DEFAULT_GRACE_PERIOD);
+ 
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD);


Unnecessary formatting change.



if (retval == -ETIME)
qpd->reset_wavefronts = true;
} else {
@@ -2463,6 +2462,98 @@ static void kfd_process_hw_exception(struct work_struct 
*work)
amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
  }
  
+int reserve_debug_trap_vmid(struct device_queue_manager *dqm,

+   struct qcm_process_device *qpd)
+{
+   int r;
+   int updated_vmid_mask;
+
+   if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+   pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+   return -EINVAL;
+   }
+
+   dqm_lock(dqm);
+
+   if (dqm->trap_debug_vmid != 0) {
+   pr_err("Trap debug id already reserved\n");
+   r = -EBUSY;
+   goto out_unlock;
+   }
+
+   r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+   USE_DEFAULT_GRACE_PERIOD, false);
+   if (r)
+   goto out_unlock;
+
+   updated_vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
+   updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd);
+
+   dqm->dev->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
+   dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd;
+   r = set_sched_resources(dqm);
+   if (r)
+   goto out_unlock;
+
+   r = map_queues_cpsch(dqm);
+   if (r)
+   goto out_unlock;
+
+   pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid);
+
+out_unlock:
+   dqm_unlock(dqm);
+   return r;
+}
+
+/*
+ * Releases vmid for the trap debugger
+ */
+int release_debug_trap_vmid(struct device_queue_manager *dqm,
+   struct qcm_process_device *qpd)
+{
+   int r;
+   int updated_vmid_mask;
+   uint32_t trap_debug_vmid;
+
+   if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
+   pr_err("Unsupported on sched_policy: %i\n", dqm->sched_policy);
+   return -EINVAL;
+   }
+
+   dqm_lock(dqm);
+   trap_debug_vmid = dqm->trap_debug_vmid;
+   if (dqm->trap_debug_vmid == 0) {
+   pr_err("Trap debug id is not reserved\n");
+   r = -EINVAL;
+   goto out_unlock;
+   }
+
+   r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0,
+   USE_DEFAULT_GRACE_PERIOD, false);
+   if (r)
+   goto out_unlock;
+
+   updated_vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
+   updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd);
+
+   dqm->dev->shared_resources.compute_vmid_bitmap = updated_vmid_mask;
+   dqm->trap_debug_vmid = 0;
+   r = set_sched_resources(

Re: [PATCH 11/32] drm/amdgpu: add configurable grace period for unmap queues

2023-03-20 Thread Felix Kuehling

On 2023-01-25 14:53, Jonathan Kim wrote:

The HWS schedule allows a grace period for wave completion prior to
preemption for better performance by avoiding CWSR on waves that can
potentially complete quickly. The debugger, on the other hand, will
want to inspect wave status immediately after it actively triggers
preemption (a suspend function to be provided).

To minimize latency between preemption and debugger wave inspection, allow
immediate preemption by setting the grace period to 0.

Note that setting the preepmtion grace period to 0 will result in an
infinite grace period being set due to a CP FW bug so set it to 1 for now.

v2: clarify purpose in the description of this patch

Signed-off-by: Jonathan Kim 


Reviewed-by: Felix Kuehling 



---
  .../drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c  |  2 +
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  2 +
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 43 
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h|  6 ++
  .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c  |  2 +
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 43 
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  9 ++-
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 61 -
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +
  .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 32 +
  .../drm/amd/amdkfd/kfd_packet_manager_v9.c| 39 +++
  .../gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h   | 65 +++
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  5 ++
  13 files changed, 291 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index 89868f9927ae..a64a53f9efe6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -81,5 +81,7 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_aldebaran_enable_debug_trap,
.disable_debug_trap = kgd_aldebaran_disable_debug_trap,
+   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+   .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index d5bb86ccd617..ef8befc31fc6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -410,6 +410,8 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
kgd_gfx_v9_set_vm_context_page_table_base,
.enable_debug_trap = kgd_arcturus_enable_debug_trap,
.disable_debug_trap = kgd_arcturus_disable_debug_trap,
+   .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
+   .build_grace_period_packet_info = 
kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = 
kgd_gfx_v9_program_trap_handler_settings,
  };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index c09b45de02d0..2491402afd58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -801,6 +801,47 @@ uint32_t kgd_gfx_v10_disable_debug_trap(struct 
amdgpu_device *adev,
return 0;
  }
  
+/* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values

+ * The values read are:
+ * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads.
+ * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads.
+ * wrm_offload_wait_time-- Wait Count for WAIT_REG_MEM Offloads.
+ * gws_wait_time-- Wait Count for Global Wave Syncs.
+ * que_sleep_wait_time  -- Wait Count for Dequeue Retry.
+ * sch_wave_wait_time   -- Wait Count for Scheduling Wave Message.
+ * sem_rearm_wait_time  -- Wait Count for Semaphore re-arm.
+ * deq_retry_wait_time  -- Wait Count for Global Wave Syncs.
+ */
+void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev,
+   uint32_t *wait_times)
+
+{
+   *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2));
+}
+
+void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
+   uint32_t wait_times,
+   uint32_t grace_period,
+   uint32_t *reg_offset,
+   uint32_t *reg_data)
+{
+   *reg_data = wait_times;
+
+   /*
+* The CP cannont handle a 0 grace period input and will result i

[PATCH 02/11] drm/amdgpu/gfx11: check the CP FW version CP GFX shadow support

2023-03-20 Thread Alex Deucher
Only set the supported flag if we have new enough CP FW.

XXX: don't commit this until the CP FW versions are finalized!

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index a0d830dc0d01..4a50d0fbcdcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -469,6 +469,15 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct 
amdgpu_device *adev)
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
case IP_VERSION(11, 0, 3):
+   /* XXX fix me! */
+   if ((adev->gfx.me_fw_version >= 1498) &&
+   (adev->gfx.me_feature_version >= 29) &&
+   (adev->gfx.pfp_fw_version >= 1541) &&
+   (adev->gfx.pfp_feature_version >= 29) &&
+   (adev->gfx.mec_fw_version >= 507) &&
+   (adev->gfx.mec_feature_version >= 29))
+   adev->gfx.cp_gfx_shadow = true;
+   break;
default:
adev->gfx.cp_gfx_shadow = false;
break;
-- 
2.39.2



[PATCH 07/11] drm/amdgpu: add UAPI to query GFX shadow sizes

2023-03-20 Thread Alex Deucher
Add UAPI to query the GFX shadow buffer requirements
for preemption on GFX11.  UMDs need to specify the shadow
areas for preemption.

Signed-off-by: Alex Deucher 
---
 include/uapi/drm/amdgpu_drm.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 3d9474af6566..19a806145371 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -886,6 +886,7 @@ struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
#define AMDGPU_INFO_VIDEO_CAPS_DECODE   0
/* Subquery id: Encode */
#define AMDGPU_INFO_VIDEO_CAPS_ENCODE   1
+#define AMDGPU_INFO_CP_GFX_SHADOW_SIZE 0x22
 
 #define AMDGPU_INFO_MMR_SE_INDEX_SHIFT 0
 #define AMDGPU_INFO_MMR_SE_INDEX_MASK  0xff
@@ -1203,6 +1204,15 @@ struct drm_amdgpu_info_video_caps {
struct drm_amdgpu_info_video_codec_info 
codec_info[AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_COUNT];
 };
 
+struct drm_amdgpu_info_cp_gfx_shadow_size {
+   __u32 shadow_size;
+   __u32 shadow_alignment;
+   __u32 csa_size;
+   __u32 csa_alignment;
+   __u32 gds_size;
+   __u32 gds_alignment;
+};
+
 /*
  * Supported GPU families
  */
-- 
2.39.2



[PATCH 06/11] drm/amdgpu: don't require a job for cond_exec and shadow

2023-03-20 Thread Alex Deucher
We need to reset the shadow state every time we submit an
IB and there needs to be a COND_EXEC packet after the
SET_Q_PREEMPTION_MODE packet for it to work properly, so
we should emit both of these packets regardless of whether
there is a job present or not.

Reviewed-by: Christian König 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 7fb8e6691d13..3b35e21eb934 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -223,11 +223,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
 
amdgpu_ring_ib_begin(ring);
 
-   if (job && ring->funcs->emit_gfx_shadow)
+   if (ring->funcs->emit_gfx_shadow)
amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
init_shadow, vmid);
 
-   if (job && ring->funcs->init_cond_exec)
+   if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
amdgpu_device_flush_hdp(adev, ring);
-- 
2.39.2



[PATCH 08/11] drm/amdgpu: add gfx shadow callback

2023-03-20 Thread Alex Deucher
To provide IP specific shadow sizes.  UMDs will use
this to query the kernel driver for the size of the
shadow buffers.

v2: make callback return an int (Alex)

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 4ad9e225d6e6..8826f1efc75f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -219,6 +219,15 @@ struct amdgpu_gfx_ras {
struct amdgpu_iv_entry *entry);
 };
 
+struct amdgpu_gfx_shadow_info {
+   u32 shadow_size;
+   u32 shadow_alignment;
+   u32 csa_size;
+   u32 csa_alignment;
+   u32 gds_size;
+   u32 gds_alignment;
+};
+
 struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
@@ -236,6 +245,8 @@ struct amdgpu_gfx_funcs {
 u32 queue, u32 vmid);
void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+   int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+  struct amdgpu_gfx_shadow_info *shadow_info);
 };
 
 struct sq_work {
@@ -372,6 +383,7 @@ struct amdgpu_gfx {
 #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) 
(adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) 
(adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
 #define amdgpu_gfx_init_spm_golden(adev) 
(adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) 
(adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))
 
 /**
  * amdgpu_gfx_create_bitmask - create a bitmask
-- 
2.39.2



[PATCH 11/11] drm/amdgpu: bump driver version number for CP GFX shadow

2023-03-20 Thread Alex Deucher
So UMDs can determine whether the kernel supports this.

Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 64214996278b..81e409501f58 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -110,9 +110,10 @@
  *   3.52.0 - Add AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD, add device_info 
fields:
  *tcp_cache_size, num_sqc_per_wgp, sqc_data_cache_size, 
sqc_inst_cache_size,
  *gl1c_cache_size, gl2c_cache_size, mall_size, 
enabled_rb_pipes_mask_hi
+ *   3.53.0 - Support for GFX11 CP GFX shadowing
  */
 #define KMS_DRIVER_MAJOR   3
-#define KMS_DRIVER_MINOR   52
+#define KMS_DRIVER_MINOR   53
 #define KMS_DRIVER_PATCHLEVEL  0
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
-- 
2.39.2



[PATCH 10/11] drm/amdgpu: add support for new GFX shadow size query

2023-03-20 Thread Alex Deucher
Use the new callback to fetch the data.  Return an error if
not supported.  UMDs should use this query to check whether
shadow buffers are supported and if so what size they
should be.

v2: return an error rather than a zerod structure.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 26 +
 1 file changed, 26 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 9e85eedb57d8..8a6764756dcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1139,6 +1139,32 @@ int amdgpu_info_ioctl(struct drm_device *dev, void 
*data, struct drm_file *filp)
kfree(caps);
return r;
}
+   case AMDGPU_INFO_CP_GFX_SHADOW_SIZE: {
+   struct amdgpu_gfx_shadow_info shadow_info;
+   struct drm_amdgpu_info_cp_gfx_shadow_size drm_shadow_size;
+   int r;
+
+   memset(&shadow_info, 0, sizeof(struct amdgpu_gfx_shadow_info));
+   if (adev->gfx.funcs->get_gfx_shadow_info) {
+   r = amdgpu_gfx_get_gfx_shadow_info(adev, &shadow_info);
+   if (r)
+   return r;
+   drm_shadow_size.shadow_size = shadow_info.shadow_size;
+   drm_shadow_size.shadow_alignment = 
shadow_info.shadow_alignment;
+   drm_shadow_size.csa_size = shadow_info.csa_size;
+   drm_shadow_size.csa_alignment = 
shadow_info.csa_alignment;
+   drm_shadow_size.gds_size = shadow_info.gds_size;
+   drm_shadow_size.gds_alignment = 
shadow_info.gds_alignment;
+   } else {
+   return -ENOTSUPP;
+   }
+   r = copy_to_user(out, &drm_shadow_size,
+min((size_t)size,
+sizeof(struct 
drm_amdgpu_info_cp_gfx_shadow_size))) ?
+   -EFAULT : 0;
+   return r;
+
+   }
default:
DRM_DEBUG_KMS("Invalid request %d\n", info->query);
return -EINVAL;
-- 
2.39.2



[PATCH 09/11] drm/amdgpu: add get_gfx_shadow_info callback for gfx11

2023-03-20 Thread Alex Deucher
Used to get the size and alignment requirements for
the gfx shadow buffer for preemption.

v2: use FW version check to determine whether to
return a valid size here
return an error if not supported (Alex)

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 27 ++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 1fc1e941f7df..d6aeaf530fe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -822,6 +822,32 @@ static void gfx_v11_0_select_me_pipe_q(struct 
amdgpu_device *adev,
soc21_grbm_select(adev, me, pipe, q, vm);
 }
 
+/* all sizes are in bytes */
+#define MQD_SHADOW_BASE_SIZE  73728
+#define MQD_SHADOW_BASE_ALIGNMENT 256
+#define MQD_FWWORKAREA_SIZE   484
+#define MQD_FWWORKAREA_ALIGNMENT  256
+
+static int gfx_v11_0_get_gfx_shadow_info(struct amdgpu_device *adev,
+struct amdgpu_gfx_shadow_info 
*shadow_info)
+{
+   if (shadow_info) {
+   if (adev->gfx.cp_gfx_shadow) {
+   shadow_info->shadow_size = MQD_SHADOW_BASE_SIZE;
+   shadow_info->shadow_alignment = 
MQD_SHADOW_BASE_ALIGNMENT;
+   shadow_info->csa_size = MQD_FWWORKAREA_SIZE;
+   shadow_info->csa_alignment = MQD_FWWORKAREA_ALIGNMENT;
+   shadow_info->gds_size = 0x1000;
+   shadow_info->gds_alignment = 256;
+   return 0;
+   } else {
+   memset(shadow_info, 0, sizeof(struct 
amdgpu_gfx_shadow_info));
+   return -ENOTSUPP;
+   }
+   }
+   return -EINVAL;
+}
+
 static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.get_gpu_clock_counter = &gfx_v11_0_get_gpu_clock_counter,
.select_se_sh = &gfx_v11_0_select_se_sh,
@@ -830,6 +856,7 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
.read_wave_vgprs = &gfx_v11_0_read_wave_vgprs,
.select_me_pipe_q = &gfx_v11_0_select_me_pipe_q,
.update_perfmon_mgcg = &gfx_v11_0_update_perf_clk,
+   .get_gfx_shadow_info = &gfx_v11_0_get_gfx_shadow_info,
 };
 
 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
-- 
2.39.2



[PATCH 05/11] drm/amdgpu: add gfx11 emit shadow callback

2023-03-20 Thread Alex Deucher
From: Christian König 

Add ring callback for gfx to update the CP firmware
with the new shadow information before we process the
IB.

v2: add implementation for new packet (Alex)
v3: add current FW version checks (Alex)
v4: only initialize shadow on first use
Only set IB_VMID when a valid shadow buffer is present
(Alex)
v5: Pass parameters rather than job to new ring callback (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 25 +
 drivers/gpu/drm/amd/amdgpu/nvd.h   |  5 -
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 4a50d0fbcdcf..1fc1e941f7df 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -5598,6 +5598,29 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct 
amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
 }
 
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
+  u64 shadow_va, u64 csa_va,
+  u64 gds_va, bool init_shadow,
+  int vmid)
+{
+   struct amdgpu_device *adev = ring->adev;
+
+   if (!adev->gfx.cp_gfx_shadow)
+   return;
+
+   amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+   amdgpu_ring_write(ring, lower_32_bits(shadow_va));
+   amdgpu_ring_write(ring, upper_32_bits(shadow_va));
+   amdgpu_ring_write(ring, lower_32_bits(gds_va));
+   amdgpu_ring_write(ring, upper_32_bits(gds_va));
+   amdgpu_ring_write(ring, lower_32_bits(csa_va));
+   amdgpu_ring_write(ring, upper_32_bits(csa_va));
+   amdgpu_ring_write(ring, shadow_va ?
+ PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid): 0);
+   amdgpu_ring_write(ring, init_shadow ?
+ PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+}
+
 static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
 {
unsigned ret;
@@ -6219,6 +6242,7 @@ static const struct amdgpu_ring_funcs 
gfx_v11_0_ring_funcs_gfx = {
.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
+   9 + /* SET_Q_PREEMPTION_MODE */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
@@ -6245,6 +6269,7 @@ static const struct amdgpu_ring_funcs 
gfx_v11_0_ring_funcs_gfx = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+   .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v11_0_ring_preempt_ib,
diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h
index fd6b58243b03..631dafb92299 100644
--- a/drivers/gpu/drm/amd/amdgpu/nvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/nvd.h
@@ -462,6 +462,9 @@
 #  define PACKET3_QUERY_STATUS_ENG_SEL(x)  ((x) << 25)
 #definePACKET3_RUN_LIST0xA5
 #definePACKET3_MAP_PROCESS_VM  0xA6
-
+/* GFX11 */
+#definePACKET3_SET_Q_PREEMPTION_MODE   0xF0
+#  define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x)  ((x) << 0)
+#  define PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM(1 << 0)
 
 #endif
-- 
2.39.2



[PATCH 04/11] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
amdgpu_cs_pass1()
Only initialize shadow on first use
(Alex)
v4: Pass parameters rather than job to new ring callback (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 26 
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 17 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  3 +++
 4 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..50cc117bc628 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+   case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
 
default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
return 0;
 }
 
+static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+  struct amdgpu_cs_chunk *chunk)
+{
+   struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+   int i;
+
+   if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW)
+   return -EINVAL;
+
+   for (i = 0; i < p->gang_size; ++i) {
+   p->jobs[i]->shadow_va = shadow->shadow_va;
+   p->jobs[i]->csa_va = shadow->csa_va;
+   p->jobs[i]->gds_va = shadow->gds_va;
+   p->jobs[i]->init_shadow =
+   shadow->flags & 
AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW;
+   }
+
+   return 0;
+}
+
 static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
 {
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+   case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+   r = amdgpu_cs_p2_shadow(p, chunk);
+   if (r)
+   return r;
+   break;
}
}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b348dbe2..7fb8e6691d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
uint64_t fence_ctx;
uint32_t status = 0, alloc_size;
unsigned fence_flags = 0;
-   bool secure;
+   bool secure, init_shadow;
+   u64 shadow_va, csa_va, gds_va;
+   int vmid = AMDGPU_JOB_GET_VMID(job);
 
unsigned i;
int r = 0;
@@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
vm = job->vm;
fence_ctx = job->base.s_fence ?
job->base.s_fence->scheduled.context : 0;
+   shadow_va = job->shadow_va;
+   csa_va = job->csa_va;
+   gds_va = job->gds_va;
+   init_shadow = job->init_shadow;
} else {
vm = NULL;
fence_ctx = 0;
+   shadow_va = 0;
+   csa_va = 0;
+   gds_va = 0;
+   init_shadow = false;
}
 
if (!ring->sched.ready && !ring->is_mes_queue) {
@@ -212,6 +222,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
 
amdgpu_ring_ib_begin(ring);
+
+   if (job && ring->funcs->emit_gfx_shadow)
+   amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va,
+   init_shadow, vmid);
+
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
uint64_tuf_addr;
uint64_tuf_sequence;
 
+   /* virtual addresses for shadow/GDS/CSA */
+   uint64_tshadow_va;
+   uint64_tcsa_va;
+   uint64_tgds_va;
+   bool

[PATCH 01/11] drm/amdgpu/gfx11: add FW version check for new CP GFX shadow feature

2023-03-20 Thread Alex Deucher
Use this to determine if we support the new SET_Q_PREEMPTION_MODE
packet.

Signed-off-by: Alex Deucher 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 13 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index de9e7a00bb15..4ad9e225d6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -364,6 +364,8 @@ struct amdgpu_gfx {
 
struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
struct amdgpu_ring_mux  muxer;
+
+   boolcp_gfx_shadow; /* for gfx11 */
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) 
(adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index ecf8ceb53311..a0d830dc0d01 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -463,6 +463,18 @@ static int gfx_v11_0_init_toc_microcode(struct 
amdgpu_device *adev, const char *
return err;
 }
 
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
+{
+   switch (adev->ip_versions[GC_HWIP][0]) {
+   case IP_VERSION(11, 0, 0):
+   case IP_VERSION(11, 0, 2):
+   case IP_VERSION(11, 0, 3):
+   default:
+   adev->gfx.cp_gfx_shadow = false;
+   break;
+   }
+}
+
 static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
 {
char fw_name[40];
@@ -539,6 +551,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
 
+   gfx_v11_0_check_fw_cp_gfx_shadow(adev);
 out:
if (err) {
amdgpu_ucode_release(&adev->gfx.pfp_fw);
-- 
2.39.2



[PATCH 03/11] drm/amdgpu/UAPI: add new CS chunk for GFX shadow buffers

2023-03-20 Thread Alex Deucher
For GFX11, the UMD needs to allocate some shadow buffers
to be used for preemption.  The UMD allocates the buffers
and passes the GPU virtual address to the kernel since the
kernel will program the packet that specified these
addresses as part of its IB submission frame.

v2: UMD passes shadow init to tell kernel when to initialize
the shadow

Signed-off-by: Alex Deucher 
---
 include/uapi/drm/amdgpu_drm.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index b6eb90df5d05..3d9474af6566 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -592,6 +592,7 @@ struct drm_amdgpu_gem_va {
 #define AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES 0x07
 #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT0x08
 #define AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL  0x09
+#define AMDGPU_CHUNK_ID_CP_GFX_SHADOW   0x0a
 
 struct drm_amdgpu_cs_chunk {
__u32   chunk_id;
@@ -708,6 +709,15 @@ struct drm_amdgpu_cs_chunk_data {
};
 };
 
+#define AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW 0x1
+
+struct drm_amdgpu_cs_chunk_cp_gfx_shadow {
+   __u64 shadow_va;
+   __u64 csa_va;
+   __u64 gds_va;
+   __u64 flags;
+};
+
 /*
  *  Query h/w info: Flag that this is integrated (a.h.a. fusion) GPU
  *
-- 
2.39.2



[PATCH V2 00/11] Enable FW assisted shadowing for GFX11

2023-03-20 Thread Alex Deucher
This patch set allows for FW assisted shadowing on supported
platforms.  A new enough CP FW is required.  This feature is
required for mid command buffer preemption and proper SR-IOV
support.  This also simplifies the UMDs by allowing persistent
hardware state when the command submission executes.  UMDs
that use this will have their state retained across command
submissions.

The mesa MR to implement the user mode side of this is:
https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21986

v2: Integrate feedback from V1.  UMDs now need to specify init_shadow
on first use.

Alex Deucher (9):
  drm/amdgpu/gfx11: add FW version check for new CP GFX shadow feature
  drm/amdgpu/gfx11: check the CP FW version CP GFX shadow support
  drm/amdgpu/UAPI: add new CS chunk for GFX shadow buffers
  drm/amdgpu: don't require a job for cond_exec and shadow
  drm/amdgpu: add UAPI to query GFX shadow sizes
  drm/amdgpu: add gfx shadow callback
  drm/amdgpu: add get_gfx_shadow_info callback for gfx11
  drm/amdgpu: add support for new GFX shadow size query
  drm/amdgpu: bump driver version number for CP GFX shadow

Christian König (2):
  drm/amdgpu: add gfx shadow CS IOCTL support
  drm/amdgpu: add gfx11 emit shadow callback

 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 26 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  |  3 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h  | 14 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   | 19 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c  | 26 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  3 +
 drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c   | 74 
 drivers/gpu/drm/amd/amdgpu/nvd.h |  5 +-
 include/uapi/drm/amdgpu_drm.h| 20 +++
 10 files changed, 192 insertions(+), 4 deletions(-)

-- 
2.39.2



Re: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2023-03-20 Thread Christian König

Ah, yes! GART doesn't need to be read to make a GTT allocation.

When GART becomes ready it will be filled with all the buffers which 
were allocated before it was ready.


So this is perfectly fine.

Thanks,
Christian.

Am 20.03.23 um 18:24 schrieb Kim, Jonathan:

[Public]

This was a long time ago but I think we agreed allocation was ok before GART 
was ready.
IIRC, there was also some mentioned related scenario where APUs needed to work 
without VRAM but allocations were required (but I don't know the details 
regarding that).
I vaguely remember the requirement for GART readiness for the bounce buffer 
allocation caused some problems elsewhere.
Are there problems observed with the bounce buffer being allocated without GART 
readiness?

Thanks,

Jon

-Original Message-
From: Christian König 
Sent: Monday, March 20, 2023 1:02 PM
To: Quan, Evan ; Kim, Jonathan
; amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix ; Koenig, Christian

Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access performance
using sdma

Caution: This message originated from an External Source. Use proper
caution when opening attachments, clicking links, or responding.


I don't think so. Have we recently re-ordered something here?

Christian.

Am 20.03.23 um 08:05 schrieb Quan, Evan:

[AMD Official Use Only - General]

I happened to find the sdma_access_bo allocation from GTT seems

performing before gart is ready.

That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still

NULL.

Is that done intentionally ?

Evan

-Original Message-
From: amd-gfx  On Behalf Of
Jonathan Kim
Sent: Wednesday, January 5, 2022 3:12 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix ; Kim, Jonathan
; Koenig, Christian



Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
using sdma

For better performance during VRAM access for debugged processes, do
read/write copies over SDMA.

In order to fulfill post mortem debugging on a broken device, fallback to
stable MMIO access when gpu recovery is disabled or when job

submission

time outs are set to max.  Failed SDMA access should automatically fall
back to MMIO access.

Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
page-table updates and TLB flushes on access.

Signed-off-by: Jonathan Kim 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
+
   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
   2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 367abed1d6e6..512df4c09772 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -48,6 +48,7 @@
   #include 

   #include 
+#include 

   #include "amdgpu.h"
   #include "amdgpu_object.h"
@@ -1429,6 +1430,70 @@ static void

amdgpu_ttm_vram_mm_access(struct

amdgpu_device *adev, loff_t pos,
  }
   }

+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
*bo,
+unsigned long offset, void *buf, int
len, int write)
+{
+struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+struct amdgpu_job *job;
+struct dma_fence *fence;
+uint64_t src_addr, dst_addr;
+unsigned int num_dw;
+int r, idx;
+
+if (len != PAGE_SIZE)
+return -EINVAL;
+
+if (!adev->mman.sdma_access_ptr)
+return -EACCES;
+
+r = drm_dev_enter(adev_to_drm(adev), &idx);
+if (r)
+return r;
+
+if (write)
+memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
AMDGPU_IB_POOL_DELAYED, &job);
+if (r)
+goto out;
+
+src_addr = write ? amdgpu_bo_gpu_offset(adev-

mman.sdma_access_bo) :

+amdgpu_bo_gpu_offset(abo);
+dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
+amdgpu_bo_gpu_offset(adev-

mman.sdma_access_bo);

+amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
PAGE_SIZE, false);
+
+amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+WARN_ON(job->ibs[0].length_dw > num_dw);
+
+r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+if (r) {
+amdgpu_job_free(job);
+goto out;
+}
+
+if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+r = -ETIMEDOUT;
+dma_fence_put(fence);
+
+if (!(r || write))
+memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+drm_dev_exit(idx);
+return r;
+}
+
+static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
amdgpu_device *adev)
+{
+return amdgpu_gpu_recovery == 0 ||
+adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+adev->sdma_timeou

RE: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2023-03-20 Thread Kim, Jonathan
[Public]

This was a long time ago but I think we agreed allocation was ok before GART 
was ready.
IIRC, there was also some mentioned related scenario where APUs needed to work 
without VRAM but allocations were required (but I don't know the details 
regarding that).
I vaguely remember the requirement for GART readiness for the bounce buffer 
allocation caused some problems elsewhere.
Are there problems observed with the bounce buffer being allocated without GART 
readiness?

Thanks,

Jon
> -Original Message-
> From: Christian König 
> Sent: Monday, March 20, 2023 1:02 PM
> To: Quan, Evan ; Kim, Jonathan
> ; amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Koenig, Christian
> 
> Subject: Re: [PATCH] drm/amdgpu: improve debug VRAM access performance
> using sdma
>
> Caution: This message originated from an External Source. Use proper
> caution when opening attachments, clicking links, or responding.
>
>
> I don't think so. Have we recently re-ordered something here?
>
> Christian.
>
> Am 20.03.23 um 08:05 schrieb Quan, Evan:
> > [AMD Official Use Only - General]
> >
> > I happened to find the sdma_access_bo allocation from GTT seems
> performing before gart is ready.
> > That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still
> NULL.
> > Is that done intentionally ?
> >
> > Evan
> >> -Original Message-
> >> From: amd-gfx  On Behalf Of
> >> Jonathan Kim
> >> Sent: Wednesday, January 5, 2022 3:12 AM
> >> To: amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix ; Kim, Jonathan
> >> ; Koenig, Christian
> 
> >> Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
> >> using sdma
> >>
> >> For better performance during VRAM access for debugged processes, do
> >> read/write copies over SDMA.
> >>
> >> In order to fulfill post mortem debugging on a broken device, fallback to
> >> stable MMIO access when gpu recovery is disabled or when job
> submission
> >> time outs are set to max.  Failed SDMA access should automatically fall
> >> back to MMIO access.
> >>
> >> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
> >> page-table updates and TLB flushes on access.
> >>
> >> Signed-off-by: Jonathan Kim 
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
> >> +
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
> >>   2 files changed, 82 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> index 367abed1d6e6..512df4c09772 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> >> @@ -48,6 +48,7 @@
> >>   #include 
> >>
> >>   #include 
> >> +#include 
> >>
> >>   #include "amdgpu.h"
> >>   #include "amdgpu_object.h"
> >> @@ -1429,6 +1430,70 @@ static void
> amdgpu_ttm_vram_mm_access(struct
> >> amdgpu_device *adev, loff_t pos,
> >>  }
> >>   }
> >>
> >> +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
> >> *bo,
> >> +unsigned long offset, void *buf, int
> >> len, int write)
> >> +{
> >> +struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> >> +struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> >> +struct amdgpu_job *job;
> >> +struct dma_fence *fence;
> >> +uint64_t src_addr, dst_addr;
> >> +unsigned int num_dw;
> >> +int r, idx;
> >> +
> >> +if (len != PAGE_SIZE)
> >> +return -EINVAL;
> >> +
> >> +if (!adev->mman.sdma_access_ptr)
> >> +return -EACCES;
> >> +
> >> +r = drm_dev_enter(adev_to_drm(adev), &idx);
> >> +if (r)
> >> +return r;
> >> +
> >> +if (write)
> >> +memcpy(adev->mman.sdma_access_ptr, buf, len);
> >> +
> >> +num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> >> +r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
> >> AMDGPU_IB_POOL_DELAYED, &job);
> >> +if (r)
> >> +goto out;
> >> +
> >> +src_addr = write ? amdgpu_bo_gpu_offset(adev-
> >>> mman.sdma_access_bo) :
> >> +amdgpu_bo_gpu_offset(abo);
> >> +dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
> >> +amdgpu_bo_gpu_offset(adev-
> >>> mman.sdma_access_bo);
> >> +amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
> >> PAGE_SIZE, false);
> >> +
> >> +amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
> >> +WARN_ON(job->ibs[0].length_dw > num_dw);
> >> +
> >> +r = amdgpu_job_submit(job, &adev->mman.entity,
> >> AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
> >> +if (r) {
> >> +amdgpu_job_free(job);
> >> +goto out;
> >> +}
> >> +
> >> +if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
> >> +r = -ETIMEDOUT;
> >> +dma_fence_put(fence);
> >> +
> >> +if (!(r || write))
> >> +memcpy(buf, adev->mman.sdma_access_ptr, len);
> >> +out:
> >> +drm_dev_exit(idx);
>

[linux-next:master] BUILD REGRESSION 73f2c2a7e1d2b31fdd5faa6dfa151c437a6c0a5a

2023-03-20 Thread kernel test robot
tree/branch: 
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git master
branch HEAD: 73f2c2a7e1d2b31fdd5faa6dfa151c437a6c0a5a  Add linux-next specific 
files for 20230320

Error/Warning reports:

https://lore.kernel.org/oe-kbuild-all/202303081807.lblwkmpx-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303151409.por0sbf7-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303161404.ormfcy09-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303161521.jbgbafjj-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303171300.g6uem0x9-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303171506.af2gnuda-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303190142.tjyypbba-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303201615.qfu18nwv-...@intel.com
https://lore.kernel.org/oe-kbuild-all/202303202113.o9pagjgq-...@intel.com

Error/Warning: (recently discovered and may have been fixed)

drivers/gpu/drm/amd/amdgpu/../display/amdgpu_dm/amdgpu_dm_mst_types.c:211:6: 
warning: no previous prototype for 'is_synaptics_cascaded_panamera' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_validation.c:258:10: 
warning: no previous prototype for 'link_timing_bandwidth_kbps' 
[-Wmissing-prototypes]
drivers/gpu/drm/amd/amdgpu/../display/dc/link/protocols/link_dp_capability.c:2184:
 warning: expecting prototype for Check if there is a native DP or passive 
DP(). Prototype was for dp_is_sink_present() instead
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:411:11: error: call to undeclared function 
'devm_drm_of_get_bridge'; ISO C99 and later do not support implicit function 
declarations [-Wimplicit-function-declaration]
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:411:9: error: incompatible integer to 
pointer conversion assigning to 'struct drm_bridge *' from 'int' 
[-Wint-conversion]
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:449:61: error: use of undeclared identifier 
'DRM_BRIDGE_ATTACH_NO_CONNECTOR'
drivers/gpu/drm/imx/lcdc/imx-lcdc.c:449:8: error: call to undeclared function 
'drm_bridge_attach'; ISO C99 and later do not support implicit function 
declarations [-Wimplicit-function-declaration]
drivers/net/wireless/legacy/ray_cs.c:628:17: warning: 'strncpy' specified bound 
32 equals destination size [-Wstringop-truncation]
include/linux/compiler_types.h:338:27: error: expression in static assertion is 
not an integer
include/linux/compiler_types.h:340:27: error: expression in static assertion is 
not an integer
include/linux/container_of.h:20:54: error: invalid use of undefined type 
'struct module'
include/linux/mmzone.h:1749:2: error: #error Allocator MAX_ORDER exceeds 
SECTION_SIZE
include/linux/rculist.h:392:21: error: invalid use of undefined type 'struct 
module'
include/linux/stddef.h:16:33: error: invalid use of undefined type 'struct 
module'
kernel/bpf/../module/internal.h:205:2: error: assigning to 'struct module *' 
from incompatible type 'void'
kernel/bpf/../module/internal.h:205:2: error: incomplete definition of type 
'struct module'
kernel/bpf/../module/internal.h:205:2: error: offsetof of incomplete type 
'typeof (*mod)' (aka 'struct module')
kernel/bpf/../module/internal.h:205:2: error: operand of type 'void' where 
arithmetic or pointer type is required
kernel/bpf/../module/internal.h:212:2: error: assigning to 'struct module *' 
from incompatible type 'void'
kernel/bpf/../module/internal.h:212:2: error: incomplete definition of type 
'struct module'
kernel/bpf/../module/internal.h:212:2: error: offsetof of incomplete type 
'typeof (*mod)' (aka 'struct module')
kernel/bpf/../module/internal.h:212:2: error: operand of type 'void' where 
arithmetic or pointer type is required
loongarch64-linux-ld: clk-mt8173-apmixedsys.c:(.text+0x104): undefined 
reference to `mtk_clk_register_pllfhs'

Unverified Error/Warning (likely false positive, please contact us if 
interested):

drivers/soc/fsl/qe/tsa.c:140:26: sparse: sparse: incorrect type in argument 2 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:150:27: sparse: sparse: incorrect type in argument 1 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:189:26: sparse: sparse: dereference of noderef 
expression
drivers/soc/fsl/qe/tsa.c:663:22: sparse: sparse: incorrect type in assignment 
(different address spaces)
drivers/soc/fsl/qe/tsa.c:673:21: sparse: sparse: incorrect type in assignment 
(different address spaces)
include/linux/gpio/consumer.h: linux/err.h is included more than once.
include/linux/gpio/driver.h: asm/bug.h is included more than once.
io_uring/io_uring.c:432 io_prep_async_work() error: we previously assumed 
'req->file' could be null (see line 425)
io_uring/kbuf.c:221 __io_remove_buffers() warn: variable dereferenced before 
check 'b

Re: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2023-03-20 Thread Christian König

I don't think so. Have we recently re-ordered something here?

Christian.

Am 20.03.23 um 08:05 schrieb Quan, Evan:

[AMD Official Use Only - General]

I happened to find the sdma_access_bo allocation from GTT seems performing 
before gart is ready.
That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still NULL.
Is that done intentionally ?

Evan

-Original Message-
From: amd-gfx  On Behalf Of
Jonathan Kim
Sent: Wednesday, January 5, 2022 3:12 AM
To: amd-gfx@lists.freedesktop.org
Cc: Kuehling, Felix ; Kim, Jonathan
; Koenig, Christian 
Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
using sdma

For better performance during VRAM access for debugged processes, do
read/write copies over SDMA.

In order to fulfill post mortem debugging on a broken device, fallback to
stable MMIO access when gpu recovery is disabled or when job submission
time outs are set to max.  Failed SDMA access should automatically fall
back to MMIO access.

Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
page-table updates and TLB flushes on access.

Signed-off-by: Jonathan Kim 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
+
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
  2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 367abed1d6e6..512df4c09772 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -48,6 +48,7 @@
  #include 

  #include 
+#include 

  #include "amdgpu.h"
  #include "amdgpu_object.h"
@@ -1429,6 +1430,70 @@ static void amdgpu_ttm_vram_mm_access(struct
amdgpu_device *adev, loff_t pos,
}
  }

+static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
*bo,
+   unsigned long offset, void *buf, int
len, int write)
+{
+   struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
+   struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
+   struct amdgpu_job *job;
+   struct dma_fence *fence;
+   uint64_t src_addr, dst_addr;
+   unsigned int num_dw;
+   int r, idx;
+
+   if (len != PAGE_SIZE)
+   return -EINVAL;
+
+   if (!adev->mman.sdma_access_ptr)
+   return -EACCES;
+
+   r = drm_dev_enter(adev_to_drm(adev), &idx);
+   if (r)
+   return r;
+
+   if (write)
+   memcpy(adev->mman.sdma_access_ptr, buf, len);
+
+   num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
+   r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
AMDGPU_IB_POOL_DELAYED, &job);
+   if (r)
+   goto out;
+
+   src_addr = write ? amdgpu_bo_gpu_offset(adev-

mman.sdma_access_bo) :

+   amdgpu_bo_gpu_offset(abo);
+   dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
+   amdgpu_bo_gpu_offset(adev-

mman.sdma_access_bo);

+   amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
PAGE_SIZE, false);
+
+   amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
+   WARN_ON(job->ibs[0].length_dw > num_dw);
+
+   r = amdgpu_job_submit(job, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+   if (r) {
+   amdgpu_job_free(job);
+   goto out;
+   }
+
+   if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
+   r = -ETIMEDOUT;
+   dma_fence_put(fence);
+
+   if (!(r || write))
+   memcpy(buf, adev->mman.sdma_access_ptr, len);
+out:
+   drm_dev_exit(idx);
+   return r;
+}
+
+static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
amdgpu_device *adev)
+{
+   return amdgpu_gpu_recovery == 0 ||
+   adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
+   adev->video_timeout == MAX_SCHEDULE_TIMEOUT;
+}
+
  /**
   * amdgpu_ttm_access_memory - Read or Write memory that backs a
buffer object.
   *
@@ -1453,6 +1518,10 @@ static int amdgpu_ttm_access_memory(struct
ttm_buffer_object *bo,
if (bo->resource->mem_type != TTM_PL_VRAM)
return -EIO;

+   if (!amdgpu_ttm_allow_post_mortem_debug(adev) &&
+   !amdgpu_ttm_access_memory_sdma(bo, offset, buf,
len, write))
+   return len;
+
amdgpu_res_first(bo->resource, offset, len, &cursor);
while (cursor.remaining) {
size_t count, size = cursor.size;
@@ -1793,6 +1862,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
return r;
}

+   if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
+   AMDGPU_GEM_DOMAIN_GTT,
+   &adev->mman.sdma_access_bo, NULL,
+   adev->mman.sdma_access_ptr))
+   DRM_WARN("

Re: [PATCH v2] drm/amdgpu: add print for iommu translation mode

2023-03-20 Thread Felix Kuehling

On 2023-03-20 10:08, Graham Sider wrote:

Add log to display whether RAM is direct vs DMA mapped.

Signed-off-by: Graham Sider 


Acked-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3bd6c5aef796..83774824694b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3528,8 +3528,12 @@ static void amdgpu_device_check_iommu_direct_map(struct 
amdgpu_device *adev)
struct iommu_domain *domain;
  
  	domain = iommu_get_domain_for_dev(adev->dev);

-   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
+   dev_info(adev->dev, "RAM is direct mapped to GPU (not translated by 
IOMMU)\n");
adev->ram_is_direct_mapped = true;
+   } else {
+   dev_info(adev->dev, "RAM is DMA mapped to GPU (translated by 
IOMMU)\n");
+   }
  }
  
  static const struct attribute *amdgpu_dev_attributes[] = {


Re: [PATCH 04/10] drm/amdgpu: add gfx11 emit shadow callback

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:49 AM Christian König
 wrote:
>
> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> > From: Christian König 
> >
> > Add ring callback for gfx to update the CP firmware
> > with the new shadow information before we process the
> > IB.
> >
> > v2: add implementation for new packet (Alex)
> > v3: add current FW version checks (Alex)
> > v4: only initialize shadow on first use
> >  Only set IB_VMID when a valid shadow buffer is present
> >  (Alex)
> >
> > Signed-off-by: Christian König 
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 46 +
> >   drivers/gpu/drm/amd/amdgpu/nvd.h|  5 ++-
> >   3 files changed, 52 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index de9e7a00bb15..4ad9e225d6e6 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -364,6 +364,8 @@ struct amdgpu_gfx {
> >
> >   struct amdgpu_ring  sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
> >   struct amdgpu_ring_mux  muxer;
> > +
> > + boolcp_gfx_shadow; /* for gfx11 */
> >   };
> >
> >   #define amdgpu_gfx_get_gpu_clock_counter(adev) 
> > (adev)->gfx.funcs->get_gpu_clock_counter((adev))
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > index 3bf697a80cf2..166a3f640042 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
> > @@ -463,6 +463,27 @@ static int gfx_v11_0_init_toc_microcode(struct 
> > amdgpu_device *adev, const char *
> >   return err;
> >   }
> >
> > +static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
> > +{
> > + switch (adev->ip_versions[GC_HWIP][0]) {
> > + case IP_VERSION(11, 0, 0):
> > + case IP_VERSION(11, 0, 2):
> > + case IP_VERSION(11, 0, 3):
> > + /* XXX fix me! */
> > + if ((adev->gfx.me_fw_version >= 1498) &&
> > + (adev->gfx.me_feature_version >= 29) &&
> > + (adev->gfx.pfp_fw_version >= 1541) &&
> > + (adev->gfx.pfp_feature_version >= 29) &&
> > + (adev->gfx.mec_fw_version >= 507) &&
> > + (adev->gfx.mec_feature_version >= 29))
> > + adev->gfx.cp_gfx_shadow = true;
> > + break;
> > + default:
> > + adev->gfx.cp_gfx_shadow = false;
> > + break;
> > + }
> > +}
> > +
> >   static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
> >   {
> >   char fw_name[40];
> > @@ -539,6 +560,7 @@ static int gfx_v11_0_init_microcode(struct 
> > amdgpu_device *adev)
> >   /* only one MEC for gfx 11.0.0. */
> >   adev->gfx.mec2_fw = NULL;
> >
> > + gfx_v11_0_check_fw_cp_gfx_shadow(adev);
> >   out:
> >   if (err) {
> >   amdgpu_ucode_release(&adev->gfx.pfp_fw);
> > @@ -5563,6 +5585,28 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct 
> > amdgpu_ring *ring,
> >   amdgpu_ring_write(ring, 0);
> >   }
> >
> > +static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
> > +struct amdgpu_job *job)
>
> Better give the values to use here instead of the job structure.

Will fix it up.  Thanks!

Alex

>
> Regards,
> Christian.
>
> > +{
> > + unsigned vmid = AMDGPU_JOB_GET_VMID(job);
> > + struct amdgpu_device *adev = ring->adev;
> > +
> > + if (!adev->gfx.cp_gfx_shadow)
> > + return;
> > +
> > + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
> > + amdgpu_ring_write(ring, lower_32_bits(job->shadow_va));
> > + amdgpu_ring_write(ring, upper_32_bits(job->shadow_va));
> > + amdgpu_ring_write(ring, lower_32_bits(job->gds_va));
> > + amdgpu_ring_write(ring, upper_32_bits(job->gds_va));
> > + amdgpu_ring_write(ring, lower_32_bits(job->csa_va));
> > + amdgpu_ring_write(ring, upper_32_bits(job->csa_va));
> > + amdgpu_ring_write(ring, job->shadow_va ?
> > +   PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
> > + amdgpu_ring_write(ring, job->init_shadow ?
> > +   PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
> > +}
> > +
> >   static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring 
> > *ring)
> >   {
> >   unsigned ret;
> > @@ -6183,6 +6227,7 @@ static const struct amdgpu_ring_funcs 
> > gfx_v11_0_ring_funcs_gfx = {
> >   .set_wptr = gfx_v11_0_ring_set_wptr_gfx,
> >   .emit_frame_size = /* totally 242 maximum if 16 IBs */
> >   5 + /* COND_EXEC */
> > + 9 + /* SET_Q_PREEMPTION_MODE */
> >   7 + /* PIPELINE_SYNC */
> >   SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
> >   SOC15_FLUSH_GPU_TLB_NUM_

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Christian König

Am 20.03.23 um 17:01 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:55 AM Christian König
 wrote:

Am 20.03.23 um 16:49 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
   Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
   amdgpu_cs_pass1()
   Only initialize shadow on first use
   (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
5 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;

default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
return 0;
}

+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ bool shadow_initialized = false;
+ int i;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;

Do we really need all three VAs separately?


+ if (!p->ctx->shadow_initialized) {
+ p->jobs[i]->init_shadow = true;
+ shadow_initialized = true;
+ }
+ }
+ if (shadow_initialized)
+ p->ctx->shadow_initialized = true;

This is a really bad idea since the IOCTL can be interrupted later on.

Why do we need that?

Yes.  We have to only initial the buffer the first time it's used.
Doing it again will overwrite whatever userspace has done with it
since then.

I don't get what you mean with that? This here doesn't make any sense at
all.

The shadow buffer addresses must be given with every CS and updated over
and over again. Otherwise this solution won't work correctly.

The shadow replaces the old SET/LOAD model.  When the UMD uses the
shadow buffer, they no longer have to send all of their state anymore
in the IB.  The CP FW will automatically load whatever was saved when
it processes this packet.  However, the shadow needs to be initialized
by the CP FW the first time it is used.  All subsequent times, it will
just be a save/restore by the FW.  I guess the alternative would be
for the UMD to specify if it wants initialization or not, but tracking
it in the kernel aligns better with the user mode queue model where
this is handled by the MQD and is initialized the first time the queue
is loaded.


This approach is just utterly nonsense.

The problem is that neither the kernel nor the fw can know if that's the 
first submission. Only the UMD can know that.


That's the same issue we previously had with PAL and the old model which 
didn't worked at all.


Christian.



Alex


Christian.


Alex


Regards,
Christian.



+}
+
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ amdgpu_cs_p2_shadow(p, chunk);
+ break;
}
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long   ras_counter_ce;
unsigned long   ras_counter_ue;
uint32_tstable_pstate;
+ boolshadow_initialized;
};

struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b348

Re: [PATCH 07/10] drm/amdgpu: add gfx shadow callback

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:58 AM Christian König
 wrote:
>
> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> > To provide IP specific shadow sizes.  UMDs will use
> > this to query the kernel driver for the size of the
> > shadow buffers.
> >
> > v2: make callback return an int (Alex)
> >
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 12 
> >   1 file changed, 12 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > index 4ad9e225d6e6..8826f1efc75f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
> > @@ -219,6 +219,15 @@ struct amdgpu_gfx_ras {
> >   struct amdgpu_iv_entry 
> > *entry);
> >   };
> >
> > +struct amdgpu_gfx_shadow_info {
> > + u32 shadow_size;
> > + u32 shadow_alignment;
> > + u32 csa_size;
> > + u32 csa_alignment;
> > + u32 gds_size;
> > + u32 gds_alignment;
>
> I don't think we need an alignment for those. Otherwise we would run
> into problems with the VA mappings as well.

This is for the GDS save area so it's just memory.  The size is just
the amount of GDS on the ASIC and the alignment is standard 256B like
most other CP things.

Alex

>
> Regards,
> Christian.
>
> > +};
> > +
> >   struct amdgpu_gfx_funcs {
> >   /* get the gpu clock counter */
> >   uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
> > @@ -236,6 +245,8 @@ struct amdgpu_gfx_funcs {
> >u32 queue, u32 vmid);
> >   void (*init_spm_golden)(struct amdgpu_device *adev);
> >   void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
> > + int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
> > +struct amdgpu_gfx_shadow_info 
> > *shadow_info);
> >   };
> >
> >   struct sq_work {
> > @@ -372,6 +383,7 @@ struct amdgpu_gfx {
> >   #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) 
> > (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
> >   #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) 
> > (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
> >   #define amdgpu_gfx_init_spm_golden(adev) 
> > (adev)->gfx.funcs->init_spm_golden((adev))
> > +#define amdgpu_gfx_get_gfx_shadow_info(adev, si) 
> > (adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))
> >
> >   /**
> >* amdgpu_gfx_create_bitmask - create a bitmask
>


Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:55 AM Christian König
 wrote:
>
> Am 20.03.23 um 16:49 schrieb Alex Deucher:
> > On Mon, Mar 20, 2023 at 11:46 AM Christian König
> >  wrote:
> >> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> >>> From: Christian König 
> >>>
> >>> Add support for submitting the shadow update packet
> >>> when submitting an IB.  Needed for MCBP on GFX11.
> >>>
> >>> v2: update API for CSA (Alex)
> >>> v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
> >>>   Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
> >>>   amdgpu_cs_pass1()
> >>>   Only initialize shadow on first use
> >>>   (Alex)
> >>>
> >>> Signed-off-by: Christian König 
> >>> Signed-off-by: Alex Deucher 
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >>>5 files changed, 37 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> index f6144b378617..9bdda246b09c 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> @@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
> >>>case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
> >>>case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> >>>case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> >>> + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >>>break;
> >>>
> >>>default:
> >>> @@ -587,6 +588,26 @@ static int 
> >>> amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
> >>>return 0;
> >>>}
> >>>
> >>> +static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
> >>> + struct amdgpu_cs_chunk *chunk)
> >>> +{
> >>> + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
> >>> + bool shadow_initialized = false;
> >>> + int i;
> >>> +
> >>> + for (i = 0; i < p->gang_size; ++i) {
> >>> + p->jobs[i]->shadow_va = shadow->shadow_va;
> >>> + p->jobs[i]->csa_va = shadow->csa_va;
> >>> + p->jobs[i]->gds_va = shadow->gds_va;
> >> Do we really need all three VAs separately?
> >>
> >>> + if (!p->ctx->shadow_initialized) {
> >>> + p->jobs[i]->init_shadow = true;
> >>> + shadow_initialized = true;
> >>> + }
> >>> + }
> >>> + if (shadow_initialized)
> >>> + p->ctx->shadow_initialized = true;
> >> This is a really bad idea since the IOCTL can be interrupted later on.
> >>
> >> Why do we need that?
> > Yes.  We have to only initial the buffer the first time it's used.
> > Doing it again will overwrite whatever userspace has done with it
> > since then.
>
> I don't get what you mean with that? This here doesn't make any sense at
> all.
>
> The shadow buffer addresses must be given with every CS and updated over
> and over again. Otherwise this solution won't work correctly.

The shadow replaces the old SET/LOAD model.  When the UMD uses the
shadow buffer, they no longer have to send all of their state anymore
in the IB.  The CP FW will automatically load whatever was saved when
it processes this packet.  However, the shadow needs to be initialized
by the CP FW the first time it is used.  All subsequent times, it will
just be a save/restore by the FW.  I guess the alternative would be
for the UMD to specify if it wants initialization or not, but tracking
it in the kernel aligns better with the user mode queue model where
this is handled by the MQD and is initialized the first time the queue
is loaded.

Alex

>
> Christian.
>
> >
> > Alex
> >
> >> Regards,
> >> Christian.
> >>
> >>
> >>> +}
> >>> +
> >>>static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >>>{
> >>>unsigned int ce_preempt = 0, de_preempt = 0;
> >>> @@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >>>if (r)
> >>>return r;
> >>>break;
> >>> + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >>> + amdgpu_cs_p2_shadow(p, chunk);
> >>> + break;
> >>>}
> >>>}
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> index 0fa0e56daf67..909d188c41f2 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >>>unsigned long   ras_counter_ce;
> >>>unsigned long   ras_counter_ue;
> >>>uint32_t 

Re: [PATCH 07/10] drm/amdgpu: add gfx shadow callback

2023-03-20 Thread Christian König

Am 17.03.23 um 18:17 schrieb Alex Deucher:

To provide IP specific shadow sizes.  UMDs will use
this to query the kernel driver for the size of the
shadow buffers.

v2: make callback return an int (Alex)

Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 12 
  1 file changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 4ad9e225d6e6..8826f1efc75f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -219,6 +219,15 @@ struct amdgpu_gfx_ras {
struct amdgpu_iv_entry *entry);
  };
  
+struct amdgpu_gfx_shadow_info {

+   u32 shadow_size;
+   u32 shadow_alignment;
+   u32 csa_size;
+   u32 csa_alignment;
+   u32 gds_size;
+   u32 gds_alignment;


I don't think we need an alignment for those. Otherwise we would run 
into problems with the VA mappings as well.


Regards,
Christian.


+};
+
  struct amdgpu_gfx_funcs {
/* get the gpu clock counter */
uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
@@ -236,6 +245,8 @@ struct amdgpu_gfx_funcs {
 u32 queue, u32 vmid);
void (*init_spm_golden)(struct amdgpu_device *adev);
void (*update_perfmon_mgcg)(struct amdgpu_device *adev, bool enable);
+   int (*get_gfx_shadow_info)(struct amdgpu_device *adev,
+  struct amdgpu_gfx_shadow_info *shadow_info);
  };
  
  struct sq_work {

@@ -372,6 +383,7 @@ struct amdgpu_gfx {
  #define amdgpu_gfx_select_se_sh(adev, se, sh, instance) 
(adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
  #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) 
(adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
  #define amdgpu_gfx_init_spm_golden(adev) 
(adev)->gfx.funcs->init_spm_golden((adev))
+#define amdgpu_gfx_get_gfx_shadow_info(adev, si) 
(adev)->gfx.funcs->get_gfx_shadow_info((adev), (si))
  
  /**

   * amdgpu_gfx_create_bitmask - create a bitmask




Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Christian König

Am 20.03.23 um 16:49 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
  Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
  amdgpu_cs_pass1()
  Only initialize shadow on first use
  (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
   5 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
   case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
   break;

   default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
   return 0;
   }

+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ bool shadow_initialized = false;
+ int i;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;

Do we really need all three VAs separately?


+ if (!p->ctx->shadow_initialized) {
+ p->jobs[i]->init_shadow = true;
+ shadow_initialized = true;
+ }
+ }
+ if (shadow_initialized)
+ p->ctx->shadow_initialized = true;

This is a really bad idea since the IOCTL can be interrupted later on.

Why do we need that?

Yes.  We have to only initial the buffer the first time it's used.
Doing it again will overwrite whatever userspace has done with it
since then.


I don't get what you mean with that? This here doesn't make any sense at 
all.


The shadow buffer addresses must be given with every CS and updated over 
and over again. Otherwise this solution won't work correctly.


Christian.



Alex


Regards,
Christian.



+}
+
   static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
   {
   unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
   if (r)
   return r;
   break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ amdgpu_cs_p2_shadow(p, chunk);
+ break;
   }
   }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
   unsigned long   ras_counter_ce;
   unsigned long   ras_counter_ue;
   uint32_tstable_pstate;
+ boolshadow_initialized;
   };

   struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b348dbe2..d88964b9407f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
   }

   amdgpu_ring_ib_begin(ring);
+
+ if (job && ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, job);
+
   if (job && ring->funcs->init_cond_exec)
   patch_offset = amdgpu_ring_init_cond_exec(ring);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
   uint64_tuf_addr;
   uint64_tuf_sequence;

+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_tshadow_va;
+ uint64_tcsa_va;
+ uint64_tgds_va;
+ boolinit_shadow;
+
   /* job_run_

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:
>
> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> > From: Christian König 
> >
> > Add support for submitting the shadow update packet
> > when submitting an IB.  Needed for MCBP on GFX11.
> >
> > v2: update API for CSA (Alex)
> > v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
> >  Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
> >  amdgpu_cs_pass1()
> >  Only initialize shadow on first use
> >  (Alex)
> >
> > Signed-off-by: Christian König 
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >   5 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index f6144b378617..9bdda246b09c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
> >   case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >   break;
> >
> >   default:
> > @@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
> > amdgpu_cs_parser *p,
> >   return 0;
> >   }
> >
> > +static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
> > + struct amdgpu_cs_chunk *chunk)
> > +{
> > + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
> > + bool shadow_initialized = false;
> > + int i;
> > +
> > + for (i = 0; i < p->gang_size; ++i) {
> > + p->jobs[i]->shadow_va = shadow->shadow_va;
> > + p->jobs[i]->csa_va = shadow->csa_va;
> > + p->jobs[i]->gds_va = shadow->gds_va;
>
> Do we really need all three VAs separately?

That is what the firmware uses.  Normally they are stored individually
as part of the MQD so this just matches that only on the fly for IBs.

Alex

>
> > + if (!p->ctx->shadow_initialized) {
> > + p->jobs[i]->init_shadow = true;
> > + shadow_initialized = true;
>
> > + }
> > + }
> > + if (shadow_initialized)
> > + p->ctx->shadow_initialized = true;
>
> This is a really bad idea since the IOCTL can be interrupted later on.
>
> Why do we need that?
>
> Regards,
> Christian.
>
>
> > +}
> > +
> >   static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   {
> >   unsigned int ce_preempt = 0, de_preempt = 0;
> > @@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   if (r)
> >   return r;
> >   break;
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> > + amdgpu_cs_p2_shadow(p, chunk);
> > + break;
> >   }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > index 0fa0e56daf67..909d188c41f2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >   unsigned long   ras_counter_ce;
> >   unsigned long   ras_counter_ue;
> >   uint32_tstable_pstate;
> > + boolshadow_initialized;
> >   };
> >
> >   struct amdgpu_ctx_mgr {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index b348dbe2..d88964b9407f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> > unsigned num_ibs,
> >   }
> >
> >   amdgpu_ring_ib_begin(ring);
> > +
> > + if (job && ring->funcs->emit_gfx_shadow)
> > + amdgpu_ring_emit_gfx_shadow(ring, job);
> > +
> >   if (job && ring->funcs->init_cond_exec)
> >   patch_offset = amdgpu_ring_init_cond_exec(ring);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > index 9790def34815..b470808fa40e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > @@ -68,6 +68,12 @@ struct amdgpu_job {
> >   uint64_tuf_addr;
> >   uint64_tuf_sequence;
> >
> > + /* virtual addresses for shadow/GDS/CSA */
> > + uint64_tshado

Re: [PATCH 06/10] drm/amdgpu: don't require a job for cond_exec and shadow

2023-03-20 Thread Christian König

Am 17.03.23 um 18:17 schrieb Alex Deucher:

We need to reset the shadow state every time we submit an
IB and there needs to be a COND_EXEC packet after the
SET_Q_PREEMPTION_MODE packet for it to work properly, so
we should emit both of these packets regardless of whether
there is a job present or not.

Signed-off-by: Alex Deucher 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index d88964b9407f..cd5b0df44ffb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -213,10 +213,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
  
  	amdgpu_ring_ib_begin(ring);
  
-	if (job && ring->funcs->emit_gfx_shadow)

+   if (ring->funcs->emit_gfx_shadow)
amdgpu_ring_emit_gfx_shadow(ring, job);
  
-	if (job && ring->funcs->init_cond_exec)

+   if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
  
  	amdgpu_device_flush_hdp(adev, ring);




Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:
>
> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> > From: Christian König 
> >
> > Add support for submitting the shadow update packet
> > when submitting an IB.  Needed for MCBP on GFX11.
> >
> > v2: update API for CSA (Alex)
> > v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
> >  Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
> >  amdgpu_cs_pass1()
> >  Only initialize shadow on first use
> >  (Alex)
> >
> > Signed-off-by: Christian König 
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >   5 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index f6144b378617..9bdda246b09c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
> >   case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >   break;
> >
> >   default:
> > @@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
> > amdgpu_cs_parser *p,
> >   return 0;
> >   }
> >
> > +static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
> > + struct amdgpu_cs_chunk *chunk)
> > +{
> > + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
> > + bool shadow_initialized = false;
> > + int i;
> > +
> > + for (i = 0; i < p->gang_size; ++i) {
> > + p->jobs[i]->shadow_va = shadow->shadow_va;
> > + p->jobs[i]->csa_va = shadow->csa_va;
> > + p->jobs[i]->gds_va = shadow->gds_va;
>
> Do we really need all three VAs separately?
>
> > + if (!p->ctx->shadow_initialized) {
> > + p->jobs[i]->init_shadow = true;
> > + shadow_initialized = true;
>
> > + }
> > + }
> > + if (shadow_initialized)
> > + p->ctx->shadow_initialized = true;
>
> This is a really bad idea since the IOCTL can be interrupted later on.
>
> Why do we need that?

Yes.  We have to only initial the buffer the first time it's used.
Doing it again will overwrite whatever userspace has done with it
since then.

Alex

>
> Regards,
> Christian.
>
>
> > +}
> > +
> >   static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   {
> >   unsigned int ce_preempt = 0, de_preempt = 0;
> > @@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   if (r)
> >   return r;
> >   break;
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> > + amdgpu_cs_p2_shadow(p, chunk);
> > + break;
> >   }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > index 0fa0e56daf67..909d188c41f2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >   unsigned long   ras_counter_ce;
> >   unsigned long   ras_counter_ue;
> >   uint32_tstable_pstate;
> > + boolshadow_initialized;
> >   };
> >
> >   struct amdgpu_ctx_mgr {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index b348dbe2..d88964b9407f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> > unsigned num_ibs,
> >   }
> >
> >   amdgpu_ring_ib_begin(ring);
> > +
> > + if (job && ring->funcs->emit_gfx_shadow)
> > + amdgpu_ring_emit_gfx_shadow(ring, job);
> > +
> >   if (job && ring->funcs->init_cond_exec)
> >   patch_offset = amdgpu_ring_init_cond_exec(ring);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > index 9790def34815..b470808fa40e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > @@ -68,6 +68,12 @@ struct amdgpu_job {
> >   uint64_tuf_addr;
> >   uint64_tuf_sequence;
> >
> > + /* virtual addresses for shadow/GDS/CSA */
> > + uint64_ts

Re: [PATCH 04/10] drm/amdgpu: add gfx11 emit shadow callback

2023-03-20 Thread Christian König

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add ring callback for gfx to update the CP firmware
with the new shadow information before we process the
IB.

v2: add implementation for new packet (Alex)
v3: add current FW version checks (Alex)
v4: only initialize shadow on first use
 Only set IB_VMID when a valid shadow buffer is present
 (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h |  2 ++
  drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c  | 46 +
  drivers/gpu/drm/amd/amdgpu/nvd.h|  5 ++-
  3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index de9e7a00bb15..4ad9e225d6e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -364,6 +364,8 @@ struct amdgpu_gfx {
  
  	struct amdgpu_ring		sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];

struct amdgpu_ring_mux  muxer;
+
+   boolcp_gfx_shadow; /* for gfx11 */
  };
  
  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 3bf697a80cf2..166a3f640042 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -463,6 +463,27 @@ static int gfx_v11_0_init_toc_microcode(struct 
amdgpu_device *adev, const char *
return err;
  }
  
+static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)

+{
+   switch (adev->ip_versions[GC_HWIP][0]) {
+   case IP_VERSION(11, 0, 0):
+   case IP_VERSION(11, 0, 2):
+   case IP_VERSION(11, 0, 3):
+   /* XXX fix me! */
+   if ((adev->gfx.me_fw_version >= 1498) &&
+   (adev->gfx.me_feature_version >= 29) &&
+   (adev->gfx.pfp_fw_version >= 1541) &&
+   (adev->gfx.pfp_feature_version >= 29) &&
+   (adev->gfx.mec_fw_version >= 507) &&
+   (adev->gfx.mec_feature_version >= 29))
+   adev->gfx.cp_gfx_shadow = true;
+   break;
+   default:
+   adev->gfx.cp_gfx_shadow = false;
+   break;
+   }
+}
+
  static int gfx_v11_0_init_microcode(struct amdgpu_device *adev)
  {
char fw_name[40];
@@ -539,6 +560,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device 
*adev)
/* only one MEC for gfx 11.0.0. */
adev->gfx.mec2_fw = NULL;
  
+	gfx_v11_0_check_fw_cp_gfx_shadow(adev);

  out:
if (err) {
amdgpu_ucode_release(&adev->gfx.pfp_fw);
@@ -5563,6 +5585,28 @@ static void gfx_v11_0_ring_emit_cntxcntl(struct 
amdgpu_ring *ring,
amdgpu_ring_write(ring, 0);
  }
  
+static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,

+  struct amdgpu_job *job)


Better give the values to use here instead of the job structure.

Regards,
Christian.


+{
+   unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+   struct amdgpu_device *adev = ring->adev;
+
+   if (!adev->gfx.cp_gfx_shadow)
+   return;
+
+   amdgpu_ring_write(ring, PACKET3(PACKET3_SET_Q_PREEMPTION_MODE, 7));
+   amdgpu_ring_write(ring, lower_32_bits(job->shadow_va));
+   amdgpu_ring_write(ring, upper_32_bits(job->shadow_va));
+   amdgpu_ring_write(ring, lower_32_bits(job->gds_va));
+   amdgpu_ring_write(ring, upper_32_bits(job->gds_va));
+   amdgpu_ring_write(ring, lower_32_bits(job->csa_va));
+   amdgpu_ring_write(ring, upper_32_bits(job->csa_va));
+   amdgpu_ring_write(ring, job->shadow_va ?
+ PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(vmid) : 0);
+   amdgpu_ring_write(ring, job->init_shadow ?
+ PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
+}
+
  static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
  {
unsigned ret;
@@ -6183,6 +6227,7 @@ static const struct amdgpu_ring_funcs 
gfx_v11_0_ring_funcs_gfx = {
.set_wptr = gfx_v11_0_ring_set_wptr_gfx,
.emit_frame_size = /* totally 242 maximum if 16 IBs */
5 + /* COND_EXEC */
+   9 + /* SET_Q_PREEMPTION_MODE */
7 + /* PIPELINE_SYNC */
SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
@@ -6209,6 +6254,7 @@ static const struct amdgpu_ring_funcs 
gfx_v11_0_ring_funcs_gfx = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
+   .emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
.preempt_ib = gfx_v11_0

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Christian König

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
 Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
 amdgpu_cs_pass1()
 Only initialize shadow on first use
 (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
  5 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+   case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
  
  		default:

@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
return 0;
  }
  
+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,

+   struct amdgpu_cs_chunk *chunk)
+{
+   struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+   bool shadow_initialized = false;
+   int i;
+
+   for (i = 0; i < p->gang_size; ++i) {
+   p->jobs[i]->shadow_va = shadow->shadow_va;
+   p->jobs[i]->csa_va = shadow->csa_va;
+   p->jobs[i]->gds_va = shadow->gds_va;


Do we really need all three VAs separately?


+   if (!p->ctx->shadow_initialized) {
+   p->jobs[i]->init_shadow = true;
+   shadow_initialized = true;



+   }
+   }
+   if (shadow_initialized)
+   p->ctx->shadow_initialized = true;


This is a really bad idea since the IOCTL can be interrupted later on.

Why do we need that?

Regards,
Christian.



+}
+
  static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
  {
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+   case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+   amdgpu_cs_p2_shadow(p, chunk);
+   break;
}
}
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long   ras_counter_ce;
unsigned long   ras_counter_ue;
uint32_tstable_pstate;
+   boolshadow_initialized;
  };
  
  struct amdgpu_ctx_mgr {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b348dbe2..d88964b9407f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
  
  	amdgpu_ring_ib_begin(ring);

+
+   if (job && ring->funcs->emit_gfx_shadow)
+   amdgpu_ring_emit_gfx_shadow(ring, job);
+
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
uint64_tuf_addr;
uint64_tuf_sequence;
  
+	/* virtual addresses for shadow/GDS/CSA */

+   uint64_tshadow_va;
+   uint64_tcsa_va;
+   uint64_tgds_va;
+   boolinit_shadow;
+
/* job_run_counter >= 1 means a resubmit job */
uint32_tjob_run_counter;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 3989e755a5b4..8643d4a92c27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -212,6 +212,7 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);

Re: [PATCH 00/10] drm/radeon: Convert fbdev to DRM client

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:19 AM Thomas Zimmermann  wrote:
>
> Hi
>
> Am 20.03.23 um 16:11 schrieb Christian König:
> > Am 17.03.23 um 10:20 schrieb Thomas Zimmermann:
> >> Hi Christian
> >>
> >> Am 17.03.23 um 09:53 schrieb Christian König:
> >>> Am 16.03.23 um 10:37 schrieb Thomas Zimmermann:
>  Convert radeon's fbdev code to drm_client. Replaces the current
>  ad-hoc integration. The conversion includes a number of cleanups.
>  Only build fbdev support if the config option has been set.
> >>>
> >>> I'm torn apart on that. On the one hand it looks like a really nice
> >>> cleanup on the other hand we don't really want to touch radeon any more.
> >>
> >> It's a driver in the upstream kernel. You have to expect at least some
> >> changes.
> >
> > Some changes is not the problem, but we need a justification to change
> > something. Just that it's nice to have won't do it without extensive
> > testing.
> >
> >>
> >>>
> >>> Alex what do you think? Is that worth the risk of breaking stuff?
> >>
> >> Moving all fbdev emulation to struct drm_client is required for new
> >> in-kernel DRM clients, such as a DRM kernel logger or a boot splash.
> >
> > Well that's a rather good justification. I suggest to add that to the
> > cover-letter.
>
> Ok, will go into a possible v2. The mid-term plan is to convert the
> fbdev code in all remaining drivers to struct drm_client and remove the
> old ad-hoc callbacks.
>
> With struct drm_client, we can select in-kernel clients at compile time
> or runtime just like userspace clients. I guess, we can have a bootup
> screen and then switch to the console or the DRM logger. Or go from any
> client to the logger on kernel panics (or something like that). There's
> been occasional talk about userspace consoles, which would use such
> functionality.

Patches look good to me.  I have a pretty limited set of HW I can test
on since I don't have a functional AGP system anymore and most of my
older PCIe radeons are packed up in the attic.  Feel free to add my:
Reviewed-by: Alex Deucher 
to the series.

Alex

>
> Best regards
> Thomas
>
> >
> > Regards,
> > Christian.
> >
> >>
> >> Best regards
> >> Thomas
> >>
> >>>
> >>> Christian.
> >>>
> 
>  Thomas Zimmermann (10):
> drm/radeon: Move radeon_align_pitch() next to dumb-buffer helpers
> drm/radeon: Improve fbdev object-test helper
> drm/radeon: Remove struct radeon_fbdev
> drm/radeon: Remove test for !screen_base in fbdev probing
> drm/radeon: Move fbdev object helpers before struct fb_ops et al
> drm/radeon: Fix coding style in fbdev emulation
> drm/radeon: Move fbdev cleanup code into fb_destroy callback
> drm/radeon: Correctly clean up failed display probing
> drm/radeon: Implement client-based fbdev emulation
> drm/radeon: Only build fbdev if DRM_FBDEV_EMULATION is set
> 
>    drivers/gpu/drm/radeon/Makefile |   3 +-
>    drivers/gpu/drm/radeon/radeon.h |   2 +
>    drivers/gpu/drm/radeon/radeon_display.c |   4 -
>    drivers/gpu/drm/radeon/radeon_drv.c |   3 +-
>    drivers/gpu/drm/radeon/radeon_drv.h |   1 -
>    drivers/gpu/drm/radeon/radeon_fb.c  | 400 --
>    drivers/gpu/drm/radeon/radeon_fbdev.c   | 422
>  
>    drivers/gpu/drm/radeon/radeon_gem.c |  24 ++
>    drivers/gpu/drm/radeon/radeon_kms.c |  18 -
>    drivers/gpu/drm/radeon/radeon_mode.h|  20 +-
>    10 files changed, 464 insertions(+), 433 deletions(-)
>    delete mode 100644 drivers/gpu/drm/radeon/radeon_fb.c
>    create mode 100644 drivers/gpu/drm/radeon/radeon_fbdev.c
> 
> 
>  base-commit: ec0708e846b819c8d5b642de42448a87d7526564
> >>>
> >>
> >
>
> --
> Thomas Zimmermann
> Graphics Driver Developer
> SUSE Software Solutions Germany GmbH
> Maxfeldstr. 5, 90409 Nürnberg, Germany
> (HRB 36809, AG Nürnberg)
> Geschäftsführer: Ivo Totev


Re: [PATCH 00/10] drm/radeon: Convert fbdev to DRM client

2023-03-20 Thread Thomas Zimmermann

Hi

Am 20.03.23 um 16:11 schrieb Christian König:

Am 17.03.23 um 10:20 schrieb Thomas Zimmermann:

Hi Christian

Am 17.03.23 um 09:53 schrieb Christian König:

Am 16.03.23 um 10:37 schrieb Thomas Zimmermann:

Convert radeon's fbdev code to drm_client. Replaces the current
ad-hoc integration. The conversion includes a number of cleanups.
Only build fbdev support if the config option has been set.


I'm torn apart on that. On the one hand it looks like a really nice 
cleanup on the other hand we don't really want to touch radeon any more.


It's a driver in the upstream kernel. You have to expect at least some 
changes.


Some changes is not the problem, but we need a justification to change 
something. Just that it's nice to have won't do it without extensive 
testing.






Alex what do you think? Is that worth the risk of breaking stuff?


Moving all fbdev emulation to struct drm_client is required for new 
in-kernel DRM clients, such as a DRM kernel logger or a boot splash.


Well that's a rather good justification. I suggest to add that to the 
cover-letter.


Ok, will go into a possible v2. The mid-term plan is to convert the 
fbdev code in all remaining drivers to struct drm_client and remove the 
old ad-hoc callbacks.


With struct drm_client, we can select in-kernel clients at compile time 
or runtime just like userspace clients. I guess, we can have a bootup 
screen and then switch to the console or the DRM logger. Or go from any 
client to the logger on kernel panics (or something like that). There's 
been occasional talk about userspace consoles, which would use such 
functionality.


Best regards
Thomas



Regards,
Christian.



Best regards
Thomas



Christian.



Thomas Zimmermann (10):
   drm/radeon: Move radeon_align_pitch() next to dumb-buffer helpers
   drm/radeon: Improve fbdev object-test helper
   drm/radeon: Remove struct radeon_fbdev
   drm/radeon: Remove test for !screen_base in fbdev probing
   drm/radeon: Move fbdev object helpers before struct fb_ops et al
   drm/radeon: Fix coding style in fbdev emulation
   drm/radeon: Move fbdev cleanup code into fb_destroy callback
   drm/radeon: Correctly clean up failed display probing
   drm/radeon: Implement client-based fbdev emulation
   drm/radeon: Only build fbdev if DRM_FBDEV_EMULATION is set

  drivers/gpu/drm/radeon/Makefile |   3 +-
  drivers/gpu/drm/radeon/radeon.h |   2 +
  drivers/gpu/drm/radeon/radeon_display.c |   4 -
  drivers/gpu/drm/radeon/radeon_drv.c |   3 +-
  drivers/gpu/drm/radeon/radeon_drv.h |   1 -
  drivers/gpu/drm/radeon/radeon_fb.c  | 400 --
  drivers/gpu/drm/radeon/radeon_fbdev.c   | 422 


  drivers/gpu/drm/radeon/radeon_gem.c |  24 ++
  drivers/gpu/drm/radeon/radeon_kms.c |  18 -
  drivers/gpu/drm/radeon/radeon_mode.h    |  20 +-
  10 files changed, 464 insertions(+), 433 deletions(-)
  delete mode 100644 drivers/gpu/drm/radeon/radeon_fb.c
  create mode 100644 drivers/gpu/drm/radeon/radeon_fbdev.c


base-commit: ec0708e846b819c8d5b642de42448a87d7526564








--
Thomas Zimmermann
Graphics Driver Developer
SUSE Software Solutions Germany GmbH
Maxfeldstr. 5, 90409 Nürnberg, Germany
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev


OpenPGP_signature
Description: OpenPGP digital signature


Re: [PATCH 00/10] drm/radeon: Convert fbdev to DRM client

2023-03-20 Thread Christian König

Am 17.03.23 um 10:20 schrieb Thomas Zimmermann:

Hi Christian

Am 17.03.23 um 09:53 schrieb Christian König:

Am 16.03.23 um 10:37 schrieb Thomas Zimmermann:

Convert radeon's fbdev code to drm_client. Replaces the current
ad-hoc integration. The conversion includes a number of cleanups.
Only build fbdev support if the config option has been set.


I'm torn apart on that. On the one hand it looks like a really nice 
cleanup on the other hand we don't really want to touch radeon any more.


It's a driver in the upstream kernel. You have to expect at least some 
changes.


Some changes is not the problem, but we need a justification to change 
something. Just that it's nice to have won't do it without extensive 
testing.






Alex what do you think? Is that worth the risk of breaking stuff?


Moving all fbdev emulation to struct drm_client is required for new 
in-kernel DRM clients, such as a DRM kernel logger or a boot splash.


Well that's a rather good justification. I suggest to add that to the 
cover-letter.


Regards,
Christian.



Best regards
Thomas



Christian.



Thomas Zimmermann (10):
   drm/radeon: Move radeon_align_pitch() next to dumb-buffer helpers
   drm/radeon: Improve fbdev object-test helper
   drm/radeon: Remove struct radeon_fbdev
   drm/radeon: Remove test for !screen_base in fbdev probing
   drm/radeon: Move fbdev object helpers before struct fb_ops et al
   drm/radeon: Fix coding style in fbdev emulation
   drm/radeon: Move fbdev cleanup code into fb_destroy callback
   drm/radeon: Correctly clean up failed display probing
   drm/radeon: Implement client-based fbdev emulation
   drm/radeon: Only build fbdev if DRM_FBDEV_EMULATION is set

  drivers/gpu/drm/radeon/Makefile |   3 +-
  drivers/gpu/drm/radeon/radeon.h |   2 +
  drivers/gpu/drm/radeon/radeon_display.c |   4 -
  drivers/gpu/drm/radeon/radeon_drv.c |   3 +-
  drivers/gpu/drm/radeon/radeon_drv.h |   1 -
  drivers/gpu/drm/radeon/radeon_fb.c  | 400 --
  drivers/gpu/drm/radeon/radeon_fbdev.c   | 422 


  drivers/gpu/drm/radeon/radeon_gem.c |  24 ++
  drivers/gpu/drm/radeon/radeon_kms.c |  18 -
  drivers/gpu/drm/radeon/radeon_mode.h    |  20 +-
  10 files changed, 464 insertions(+), 433 deletions(-)
  delete mode 100644 drivers/gpu/drm/radeon/radeon_fb.c
  create mode 100644 drivers/gpu/drm/radeon/radeon_fbdev.c


base-commit: ec0708e846b819c8d5b642de42448a87d7526564








[PATCH v2] drm/amdgpu: add print for iommu translation mode

2023-03-20 Thread Graham Sider
Add log to display whether RAM is direct vs DMA mapped.

Signed-off-by: Graham Sider 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 3bd6c5aef796..83774824694b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3528,8 +3528,12 @@ static void amdgpu_device_check_iommu_direct_map(struct 
amdgpu_device *adev)
struct iommu_domain *domain;
 
domain = iommu_get_domain_for_dev(adev->dev);
-   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
+   if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
+   dev_info(adev->dev, "RAM is direct mapped to GPU (not 
translated by IOMMU)\n");
adev->ram_is_direct_mapped = true;
+   } else {
+   dev_info(adev->dev, "RAM is DMA mapped to GPU (translated by 
IOMMU)\n");
+   }
 }
 
 static const struct attribute *amdgpu_dev_attributes[] = {
-- 
2.25.1



Re: [PATCH 28/37] drm/amd/display/dc/core/dc_stat: Convert a couple of doc headers to kerneldoc format

2023-03-20 Thread Hamza Mahfooz

On 3/17/23 04:17, Lee Jones wrote:

Fixes the following W=1 kernel build warning(s):

  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:38: warning: Cannot 
understand  
*
  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:76: warning: Cannot 
understand  
*

Cc: Harry Wentland 
Cc: Leo Li 
Cc: Rodrigo Siqueira 
Cc: Alex Deucher 
Cc: "Christian König" 
Cc: "Pan, Xinhui" 
Cc: David Airlie 
Cc: Daniel Vetter 
Cc: Mustapha Ghaddar 
Cc: Nicholas Kazlauskas 
Cc: Jasdeep Dhillon 
Cc: amd-gfx@lists.freedesktop.org
Cc: dri-de...@lists.freedesktop.org
Signed-off-by: Lee Jones 


Applied, thanks!


---
  drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 28 +++
  1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c 
b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
index 6c06587dd88c2..5f6392ae31a66 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
@@ -35,19 +35,15 @@
   */
  
  /**

- *
- *  Function: dc_stat_get_dmub_notification
+ *  dc_stat_get_dmub_notification
   *
- *  @brief
- * Calls dmub layer to retrieve dmub notification
+ * Calls dmub layer to retrieve dmub notification
   *
- *  @param
- * [in] dc: dc structure
- * [in] notify: dmub notification structure
+ * @dc: dc structure
+ * @notify: dmub notification structure
   *
- *  @return
+ * Returns
   * None
- *
   */
  void dc_stat_get_dmub_notification(const struct dc *dc, struct 
dmub_notification *notify)
  {
@@ -73,19 +69,15 @@ void dc_stat_get_dmub_notification(const struct dc *dc, 
struct dmub_notification
  }
  
  /**

- *
- *  Function: dc_stat_get_dmub_dataout
+ * dc_stat_get_dmub_dataout
   *
- *  @brief
- * Calls dmub layer to retrieve dmub gpint dataout
+ * Calls dmub layer to retrieve dmub gpint dataout
   *
- *  @param
- * [in] dc: dc structure
- * [in] dataout: dmub gpint dataout
+ * @dc: dc structure
+ * @dataout: dmub gpint dataout
   *
- *  @return
+ * Returns
   * None
- *
   */
  void dc_stat_get_dmub_dataout(const struct dc *dc, uint32_t *dataout)
  {


--
Hamza



RE: [PATCH] drm/amdgpu: add print for iommu translation mode

2023-03-20 Thread Sider, Graham
[Public]

> -Original Message-
> From: Kuehling, Felix 
> Sent: Friday, March 17, 2023 5:16 PM
> To: Sider, Graham ; Russell, Kent
> ; Mahfooz, Hamza ;
> amd-gfx@lists.freedesktop.org
> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation mode
> 
> On 2023-03-17 16:04, Sider, Graham wrote:
> > [AMD Official Use Only - General]
> >
> >
> >
> >> -Original Message-
> >> From: Russell, Kent 
> >> Sent: Friday, March 17, 2023 3:58 PM
> >> To: Mahfooz, Hamza ; Sider, Graham
> >> ; amd-gfx@lists.freedesktop.org
> >> Cc: Kuehling, Felix 
> >> Subject: RE: [PATCH] drm/amdgpu: add print for iommu translation mode
> >>
> >> [AMD Official Use Only - General]
> >>
> >>
> >>
> >>> -Original Message-
> >>> From: amd-gfx  On Behalf Of
> >>> Hamza Mahfooz
> >>> Sent: Friday, March 17, 2023 3:58 PM
> >>> To: Sider, Graham ;
> >>> amd-gfx@lists.freedesktop.org
> >>> Cc: Kuehling, Felix 
> >>> Subject: Re: [PATCH] drm/amdgpu: add print for iommu translation
> >>> mode
> >>>
> >>>
> >>> On 3/17/23 15:47, Graham Sider wrote:
>  Add log to display whether RAM is direct vs DMA mapped.
> 
>  Signed-off-by: Graham Sider 
> >>> If this information is only useful for debugging purposes, please
> >>> use
> >>> drm_dbg() instead of pr_info().
> > It's useful for more than just debug I would say. Just a quick way to grep
> whether IOMMU is off/pt vs device isolation mode.
> 
> I agree. The kernel log otherwise tells you the default IOMMU domain, but it
> may not match the domain actually used for the GPU. Without this message
> there is no easy way to tell from a kernel log. This will help with triaging 
> issues
> from logs provided by external and internal users.
> 
> 
> >
> > Graham
> >
>  ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 +-
> 1 file changed, 5 insertions(+), 1 deletion(-)
> 
>  diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  index 8bba5e6872a1..8797a9523244 100644
>  --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>  @@ -3528,8 +3528,12 @@ static void
> >>> amdgpu_device_check_iommu_direct_map(struct amdgpu_device
> *adev)
>   struct iommu_domain *domain;
> 
>   domain = iommu_get_domain_for_dev(adev->dev);
>  -if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY)
>  +if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) {
>  +pr_info("RAM is direct mapped to GPU (not traslated by
> 
> Use dev_info. That way you can tell which GPU the message applies to in a
> multi-GPU system.
> 

Good point - will do that. Thanks!

Graham

> Regards,
>    Felix
> 
> 
> >> traslated -> translated
> >>
> > Thanks, my keyboard keeps skipping the on the 'n' key lately :( time for a
> clean.
> >
> > Graham
> >
> >>   Kent
> >>> IOMMU)\n");
>   adev->ram_is_direct_mapped = true;
>  +} else {
>  +pr_info("RAM is DMA mapped to GPU (translated by
> >>> IOMMU)\n");
>  +}
> }
> 
> static const struct attribute *amdgpu_dev_attributes[] = {
> >>> --
> >>> Hamza


Re: [PATCH 28/37] drm/amd/display/dc/core/dc_stat: Convert a couple of doc headers to kerneldoc format

2023-03-20 Thread Lee Jones
On Mon, 20 Mar 2023, Harry Wentland wrote:

>
>
> On 3/20/23 04:19, Lee Jones wrote:
> > On Fri, 17 Mar 2023, Alex Deucher wrote:
> >
> >> On Fri, Mar 17, 2023 at 4:23 AM Lee Jones  wrote:
> >>>
> >>> Fixes the following W=1 kernel build warning(s):
> >>>
> >>>  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:38: warning: 
> >>> Cannot understand  
> >>> *
> >>>  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:76: warning: 
> >>> Cannot understand  
> >>> *
> >>>
> >>> Cc: Harry Wentland 
> >>> Cc: Leo Li 
> >>> Cc: Rodrigo Siqueira 
> >>> Cc: Alex Deucher 
> >>> Cc: "Christian König" 
> >>> Cc: "Pan, Xinhui" 
> >>> Cc: David Airlie 
> >>> Cc: Daniel Vetter 
> >>> Cc: Mustapha Ghaddar 
> >>> Cc: Nicholas Kazlauskas 
> >>> Cc: Jasdeep Dhillon 
> >>> Cc: amd-gfx@lists.freedesktop.org
> >>> Cc: dri-de...@lists.freedesktop.org
> >>> Signed-off-by: Lee Jones 
> >>> ---
> >>>  drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 28 +++
> >>>  1 file changed, 10 insertions(+), 18 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c 
> >>> b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
> >>> index 6c06587dd88c2..5f6392ae31a66 100644
> >>> --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
> >>> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
> >>> @@ -35,19 +35,15 @@
> >>>   */
> >>>
> >>>  /**
> >>
> >> This looks like it follows some other documentation scheme.  Would
> >> probably be better to just remove the extra * and make it not kernel
> >> doc.  @Wentland, Harry @Siqueira, Rodrigo ?
> >
> > Happy to wait for further input.
> >
> > Either demoting from or converting to kerneldoc would be fine.
> >
>
> There's no reason they are formatted the way they are. Converting them to
> kerneldoc is fine.
>
> Reviewed-by: Harry Wentland 

Thanks for confirming Harry.

--
Lee Jones [李琼斯]


RE: [PATCH 19/19] drm/amdgpu/smu11: enable TEMP_DEPENDENT_VMIN for navi1x

2023-03-20 Thread Zhuo, Qingqing (Lillian)
[AMD Official Use Only - General]

Thanks Evan! Will do.

Thanks,
Lillian

-Original Message-
From: Quan, Evan 
Sent: Monday, March 20, 2023 4:26 AM
To: Zhuo, Qingqing (Lillian) ; 
amd-gfx@lists.freedesktop.org
Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo) 
; Wentland, Harry ; Zhuo, Qingqing 
(Lillian) ; Siqueira, Rodrigo 
; Li, Roman ; Chiu, Solomon 
; Pillai, Aurabindo ; Lin, 
Wayne ; Deucher, Alexander ; 
Lakha, Bhawanpreet ; Gutierrez, Agustin 
; Kotarac, Pavle 
Subject: RE: [PATCH 19/19] drm/amdgpu/smu11: enable TEMP_DEPENDENT_VMIN for 
navi1x

[AMD Official Use Only - General]

Better to update the subject with prefix as "drm/amd/pm" to align with other 
power changes.
Either way the patch is
Reviewed-by: Evan Quan 

BR
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Qingqing Zhuo
> Sent: Saturday, March 18, 2023 3:56 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo)
> ; Wentland, Harry ; Zhuo,
> Qingqing (Lillian) ; Siqueira, Rodrigo
> ; Li, Roman ; Chiu,
> Solomon ; Pillai, Aurabindo
> ; Lin, Wayne ; Deucher,
> Alexander ; Lakha, Bhawanpreet
> ; Gutierrez, Agustin
> ; Kotarac, Pavle 
> Subject: [PATCH 19/19] drm/amdgpu/smu11: enable TEMP_DEPENDENT_VMIN
> for navi1x
>
> From: Alex Deucher 
>
> May help stability with some navi1x boards.
>
> Hopefully this helps with stability with multiple monitors and would
> allow us to re-enable MPC_SPLIT_DYNAMIC in the DC code for better power 
> savings.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2196
>
> Reviewed-by: Rodrigo Siqueira 
> Acked-by: Qingqing Zhuo 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 95da6dd1cc65..c4000518dc56 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -304,7 +304,8 @@ navi10_get_allowed_feature_mask(struct
> smu_context *smu,
>   | FEATURE_MASK(FEATURE_GFX_SS_BIT)
>   | FEATURE_MASK(FEATURE_APCC_DFLL_BIT)
>   | FEATURE_MASK(FEATURE_FW_CTF_BIT)
> - |
> FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT);
> + |
> FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT)
> + |
> FEATURE_MASK(FEATURE_TEMP_DEPENDENT_VMIN_BIT);
>
>   if (adev->pm.pp_feature & PP_SCLK_DPM_MASK)
>   *(uint64_t *)feature_mask |=
> FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT);
> --
> 2.34.1


Re: [PATCH 28/37] drm/amd/display/dc/core/dc_stat: Convert a couple of doc headers to kerneldoc format

2023-03-20 Thread Harry Wentland



On 3/20/23 04:19, Lee Jones wrote:
> On Fri, 17 Mar 2023, Alex Deucher wrote:
> 
>> On Fri, Mar 17, 2023 at 4:23 AM Lee Jones  wrote:
>>>
>>> Fixes the following W=1 kernel build warning(s):
>>>
>>>  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:38: warning: 
>>> Cannot understand  
>>> *
>>>  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:76: warning: 
>>> Cannot understand  
>>> *
>>>
>>> Cc: Harry Wentland 
>>> Cc: Leo Li 
>>> Cc: Rodrigo Siqueira 
>>> Cc: Alex Deucher 
>>> Cc: "Christian König" 
>>> Cc: "Pan, Xinhui" 
>>> Cc: David Airlie 
>>> Cc: Daniel Vetter 
>>> Cc: Mustapha Ghaddar 
>>> Cc: Nicholas Kazlauskas 
>>> Cc: Jasdeep Dhillon 
>>> Cc: amd-gfx@lists.freedesktop.org
>>> Cc: dri-de...@lists.freedesktop.org
>>> Signed-off-by: Lee Jones 
>>> ---
>>>  drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 28 +++
>>>  1 file changed, 10 insertions(+), 18 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c 
>>> b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
>>> index 6c06587dd88c2..5f6392ae31a66 100644
>>> --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
>>> +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
>>> @@ -35,19 +35,15 @@
>>>   */
>>>
>>>  /**
>>
>> This looks like it follows some other documentation scheme.  Would
>> probably be better to just remove the extra * and make it not kernel
>> doc.  @Wentland, Harry @Siqueira, Rodrigo ?
> 
> Happy to wait for further input.
> 
> Either demoting from or converting to kerneldoc would be fine.
> 

There's no reason they are formatted the way they are. Converting them to
kerneldoc is fine.

Reviewed-by: Harry Wentland 

Harry

>>> - 
>>> *
>>> - *  Function: dc_stat_get_dmub_notification
>>> + *  dc_stat_get_dmub_notification
>>>   *
>>> - *  @brief
>>> - * Calls dmub layer to retrieve dmub notification
>>> + * Calls dmub layer to retrieve dmub notification
>>>   *
>>> - *  @param
>>> - * [in] dc: dc structure
>>> - * [in] notify: dmub notification structure
>>> + * @dc: dc structure
>>> + * @notify: dmub notification structure
>>>   *
>>> - *  @return
>>> + * Returns
>>>   * None
>>> - 
>>> *
>>>   */
>>>  void dc_stat_get_dmub_notification(const struct dc *dc, struct 
>>> dmub_notification *notify)
>>>  {
>>> @@ -73,19 +69,15 @@ void dc_stat_get_dmub_notification(const struct dc *dc, 
>>> struct dmub_notification
>>>  }
>>>
>>>  /**
>>> - 
>>> *
>>> - *  Function: dc_stat_get_dmub_dataout
>>> + * dc_stat_get_dmub_dataout
>>>   *
>>> - *  @brief
>>> - * Calls dmub layer to retrieve dmub gpint dataout
>>> + * Calls dmub layer to retrieve dmub gpint dataout
>>>   *
>>> - *  @param
>>> - * [in] dc: dc structure
>>> - * [in] dataout: dmub gpint dataout
>>> + * @dc: dc structure
>>> + * @dataout: dmub gpint dataout
>>>   *
>>> - *  @return
>>> + * Returns
>>>   * None
>>> - 
>>> *
>>>   */
>>>  void dc_stat_get_dmub_dataout(const struct dc *dc, uint32_t *dataout)
>>>  {
>>> --
>>> 2.40.0.rc1.284.g88254d51c5-goog
>>>
> 
> --
> Lee Jones [李琼斯]



[RFC] VRM setting in a mobile system

2023-03-20 Thread Tom Psyborg
Hello

How can i verify the driver is engageing both phases in a mobile RX460 system?
I see there are PSI0_VID, PSI0_EN and PSI1 bits defined for use,
according to VRM specs they are active low and the VBIOS voltageobject
table is not setting any of these bits. Does that mean my system is
running 1-phase DE mode? Or there is some logic in driver that
switches to 2-phase CCM?

Regards, Tom


Reminder: Last day (!) to nominate candidates for the 2023 X.Org Board of Directors Elections

2023-03-20 Thread Ricardo Garcia
Final reminder that the nomination period for the X.Org Board of
Director elections finishes today, March 19th.

Please send your nominations or self-nominations as soon as possible
following the instructions below.

Thanks again for your attention.

On Mon, 2023-03-13 at 16:27 +0100, Ricardo Garcia wrote:
> This is a reminder that the nomination period for the X.Org Board of
> Director elections finishes in a week, on March 19th.
> 
> If you would like to nominate yourself please send email to the election
> committee electi...@x.org, giving your
> 
> name
> current professional affiliation
> a statement of contribution to X.Org or related technologies
> a personal statement.
> 
> To vote or to be elected to the Board you needed to be a Member of the
> X.Org Foundation. To be a Member of the X.Org Foundation you need to
> apply or renew your membership until the end of the nomination period.
> 
> Original email follows below. Thanks for your attention.
> 
> On Wed, 2023-02-15 at 21:53 +0100, Ricardo Garcia wrote:
> > We are seeking nominations for candidates for election to the X.Org
> > Foundation Board of Directors. All X.Org Foundation members are eligible
> > for election to the board.
> > 
> > Nominations for the 2023 election are now open and will remain open
> > until 23:59 UTC on 19 March 2023.
> > 
> > The Board consists of directors elected from the membership. Each year,
> > an election is held to bring the total number of directors to eight. The
> > four members receiving the highest vote totals will serve as directors
> > for two year terms.
> > 
> > The directors who received two year terms starting in 2022 were Emma
> > Anholt, Mark Filion, Alyssa Rosenzweig and Ricardo Garcia. They will
> > continue to serve until their term ends in 2024. Current directors whose
> > term expires in 2023 are Samuel Iglesias Gonsálvez, Manasi D Navare,
> > Lyude Paul and Daniel Vetter.
> > 
> > A director is expected to participate in the fortnightly IRC meeting to
> > discuss current business and to attend the annual meeting of the X.Org
> > Foundation, which will be held at a location determined in advance by
> > the Board of Directors.
> > 
> > A member may nominate themselves or any other member they feel is
> > qualified. Nominations should be sent to the Election Committee at
> > elections at x.org.
> > 
> > Nominees shall be required to be current members of the X.Org
> > Foundation, and submit a personal statement of up to 200 words that will
> > be provided to prospective voters. The collected statements, along with
> > the statement of contribution to the X.Org Foundation in the member's
> > account page on http://members.x.org, will be made available to all
> > voters to help them make their voting decisions.
> > 
> > Nominations, membership applications or renewals and completed personal
> > statements must be received no later than 23:59 UTC on 19 March 2023.
> > 
> > The slate of candidates will be published 26 March 2023 and candidate
> > Q&A will begin then. The deadline for Xorg membership applications and
> > renewals is 26 March 2023.
> > 
> > Cheers,
> > Ricardo Garcia, on behalf of the X.Org BoD
> > 
> 




RE: [PATCH] drm/amdgpu: Initialize umc ras callback

2023-03-20 Thread Yang, Stanley
[AMD Official Use Only - General]

Reviewed-by: Stanley Yang 

Regards,
Stanley
> -Original Message-
> From: amd-gfx  On Behalf Of
> Hawking Zhang
> Sent: Monday, March 20, 2023 5:37 PM
> To: amd-gfx@lists.freedesktop.org; Zhou1, Tao 
> Cc: Zhang, Hawking 
> Subject: [PATCH] drm/amdgpu: Initialize umc ras callback
> 
> Fix a coding error which results to null interrupt handler for umc ras.
> 
> Signed-off-by: Hawking Zhang 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> index da68ceaa024c..9e2e97207e53 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
> @@ -232,7 +232,7 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device
> *adev)
>   if (!ras->ras_block.ras_late_init)
>   ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
> 
> - if (ras->ras_block.ras_cb)
> + if (!ras->ras_block.ras_cb)
>   ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
> 
>   return 0;
> --
> 2.17.1


[PATCH] drm/amdgpu: Initialize umc ras callback

2023-03-20 Thread Hawking Zhang
Fix a coding error which results to null interrupt
handler for umc ras.

Signed-off-by: Hawking Zhang 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index da68ceaa024c..9e2e97207e53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -232,7 +232,7 @@ int amdgpu_umc_ras_sw_init(struct amdgpu_device *adev)
if (!ras->ras_block.ras_late_init)
ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
 
-   if (ras->ras_block.ras_cb)
+   if (!ras->ras_block.ras_cb)
ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
 
return 0;
-- 
2.17.1



RE: [PATCH 19/19] drm/amdgpu/smu11: enable TEMP_DEPENDENT_VMIN for navi1x

2023-03-20 Thread Quan, Evan
[AMD Official Use Only - General]

Better to update the subject with prefix as "drm/amd/pm" to align with other 
power changes.
Either way the patch is
Reviewed-by: Evan Quan 

BR
Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Qingqing Zhuo
> Sent: Saturday, March 18, 2023 3:56 PM
> To: amd-gfx@lists.freedesktop.org
> Cc: Wang, Chao-kai (Stylon) ; Li, Sun peng (Leo)
> ; Wentland, Harry ;
> Zhuo, Qingqing (Lillian) ; Siqueira, Rodrigo
> ; Li, Roman ; Chiu,
> Solomon ; Pillai, Aurabindo
> ; Lin, Wayne ; Deucher,
> Alexander ; Lakha, Bhawanpreet
> ; Gutierrez, Agustin
> ; Kotarac, Pavle 
> Subject: [PATCH 19/19] drm/amdgpu/smu11: enable
> TEMP_DEPENDENT_VMIN for navi1x
> 
> From: Alex Deucher 
> 
> May help stability with some navi1x boards.
> 
> Hopefully this helps with stability with multiple monitors and would allow us
> to re-enable MPC_SPLIT_DYNAMIC in the DC code for better power savings.
> 
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2196
> 
> Reviewed-by: Rodrigo Siqueira 
> Acked-by: Qingqing Zhuo 
> Signed-off-by: Alex Deucher 
> ---
>  drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> index 95da6dd1cc65..c4000518dc56 100644
> --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
> @@ -304,7 +304,8 @@ navi10_get_allowed_feature_mask(struct
> smu_context *smu,
>   | FEATURE_MASK(FEATURE_GFX_SS_BIT)
>   | FEATURE_MASK(FEATURE_APCC_DFLL_BIT)
>   | FEATURE_MASK(FEATURE_FW_CTF_BIT)
> - |
> FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT);
> + |
> FEATURE_MASK(FEATURE_OUT_OF_BAND_MONITOR_BIT)
> + |
> FEATURE_MASK(FEATURE_TEMP_DEPENDENT_VMIN_BIT);
> 
>   if (adev->pm.pp_feature & PP_SCLK_DPM_MASK)
>   *(uint64_t *)feature_mask |=
> FEATURE_MASK(FEATURE_DPM_GFXCLK_BIT);
> --
> 2.34.1


Re: [PATCH 36/37] drm/amd/display/dc/link/link_detection: Demote a couple of kerneldoc abuses

2023-03-20 Thread Lee Jones
On Fri, 17 Mar 2023, Alex Deucher wrote:

> Applied.  Thanks!
>
> Alex

Awesome as ever Alex.  Thank you.

> On Fri, Mar 17, 2023 at 4:24 AM Lee Jones  wrote:
> >
> > Fixes the following W=1 kernel build warning(s):
> >
> >  drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_detection.c:877: 
> > warning: Function parameter or member 'link' not described in 
> > 'detect_link_and_local_sink'
> >  drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_detection.c:877: 
> > warning: Function parameter or member 'reason' not described in 
> > 'detect_link_and_local_sink'
> >  drivers/gpu/drm/amd/amdgpu/../display/dc/link/link_detection.c:1232: 
> > warning: Function parameter or member 'link' not described in 
> > 'dc_link_detect_connection_type'
> >
> > Cc: Harry Wentland 
> > Cc: Leo Li 
> > Cc: Rodrigo Siqueira 
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: "Pan, Xinhui" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Lee Jones 
> > Cc: Wenjing Liu 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Signed-off-by: Lee Jones 
> > ---
> >  drivers/gpu/drm/amd/display/dc/link/link_detection.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c 
> > b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
> > index 9a4cfa777622e..67addedd89563 100644
> > --- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
> > +++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
> > @@ -832,7 +832,7 @@ static void verify_link_capability(struct dc_link 
> > *link, struct dc_sink *sink,
> > verify_link_capability_non_destructive(link);
> >  }
> >
> > -/**
> > +/*
> >   * detect_link_and_local_sink() - Detect if a sink is attached to a given 
> > link
> >   *
> >   * link->local_sink is created or destroyed as needed.
> > @@ -1185,7 +1185,7 @@ static bool detect_link_and_local_sink(struct dc_link 
> > *link,
> > return true;
> >  }
> >
> > -/**
> > +/*
> >   * link_detect_connection_type() - Determine if there is a sink connected
> >   *
> >   * @type: Returned connection type
> > --
> > 2.40.0.rc1.284.g88254d51c5-goog
> >

--
Lee Jones [李琼斯]


Re: [PATCH 28/37] drm/amd/display/dc/core/dc_stat: Convert a couple of doc headers to kerneldoc format

2023-03-20 Thread Lee Jones
On Fri, 17 Mar 2023, Alex Deucher wrote:

> On Fri, Mar 17, 2023 at 4:23 AM Lee Jones  wrote:
> >
> > Fixes the following W=1 kernel build warning(s):
> >
> >  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:38: warning: 
> > Cannot understand  
> > *
> >  drivers/gpu/drm/amd/amdgpu/../display/dc/core/dc_stat.c:76: warning: 
> > Cannot understand  
> > *
> >
> > Cc: Harry Wentland 
> > Cc: Leo Li 
> > Cc: Rodrigo Siqueira 
> > Cc: Alex Deucher 
> > Cc: "Christian König" 
> > Cc: "Pan, Xinhui" 
> > Cc: David Airlie 
> > Cc: Daniel Vetter 
> > Cc: Mustapha Ghaddar 
> > Cc: Nicholas Kazlauskas 
> > Cc: Jasdeep Dhillon 
> > Cc: amd-gfx@lists.freedesktop.org
> > Cc: dri-de...@lists.freedesktop.org
> > Signed-off-by: Lee Jones 
> > ---
> >  drivers/gpu/drm/amd/display/dc/core/dc_stat.c | 28 +++
> >  1 file changed, 10 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c 
> > b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
> > index 6c06587dd88c2..5f6392ae31a66 100644
> > --- a/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
> > +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stat.c
> > @@ -35,19 +35,15 @@
> >   */
> >
> >  /**
>
> This looks like it follows some other documentation scheme.  Would
> probably be better to just remove the extra * and make it not kernel
> doc.  @Wentland, Harry @Siqueira, Rodrigo ?

Happy to wait for further input.

Either demoting from or converting to kerneldoc would be fine.

> > - 
> > *
> > - *  Function: dc_stat_get_dmub_notification
> > + *  dc_stat_get_dmub_notification
> >   *
> > - *  @brief
> > - * Calls dmub layer to retrieve dmub notification
> > + * Calls dmub layer to retrieve dmub notification
> >   *
> > - *  @param
> > - * [in] dc: dc structure
> > - * [in] notify: dmub notification structure
> > + * @dc: dc structure
> > + * @notify: dmub notification structure
> >   *
> > - *  @return
> > + * Returns
> >   * None
> > - 
> > *
> >   */
> >  void dc_stat_get_dmub_notification(const struct dc *dc, struct 
> > dmub_notification *notify)
> >  {
> > @@ -73,19 +69,15 @@ void dc_stat_get_dmub_notification(const struct dc *dc, 
> > struct dmub_notification
> >  }
> >
> >  /**
> > - 
> > *
> > - *  Function: dc_stat_get_dmub_dataout
> > + * dc_stat_get_dmub_dataout
> >   *
> > - *  @brief
> > - * Calls dmub layer to retrieve dmub gpint dataout
> > + * Calls dmub layer to retrieve dmub gpint dataout
> >   *
> > - *  @param
> > - * [in] dc: dc structure
> > - * [in] dataout: dmub gpint dataout
> > + * @dc: dc structure
> > + * @dataout: dmub gpint dataout
> >   *
> > - *  @return
> > + * Returns
> >   * None
> > - 
> > *
> >   */
> >  void dc_stat_get_dmub_dataout(const struct dc *dc, uint32_t *dataout)
> >  {
> > --
> > 2.40.0.rc1.284.g88254d51c5-goog
> >

--
Lee Jones [李琼斯]


RE: [PATCH] drm/amdgpu: improve debug VRAM access performance using sdma

2023-03-20 Thread Quan, Evan
[AMD Official Use Only - General]

I happened to find the sdma_access_bo allocation from GTT seems performing 
before gart is ready.
That makes the "amdgpu_gart_map" is skipped since adev->gart.ptr is still NULL.
Is that done intentionally ?

Evan
> -Original Message-
> From: amd-gfx  On Behalf Of
> Jonathan Kim
> Sent: Wednesday, January 5, 2022 3:12 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Kuehling, Felix ; Kim, Jonathan
> ; Koenig, Christian 
> Subject: [PATCH] drm/amdgpu: improve debug VRAM access performance
> using sdma
> 
> For better performance during VRAM access for debugged processes, do
> read/write copies over SDMA.
> 
> In order to fulfill post mortem debugging on a broken device, fallback to
> stable MMIO access when gpu recovery is disabled or when job submission
> time outs are set to max.  Failed SDMA access should automatically fall
> back to MMIO access.
> 
> Use a pre-allocated GTT bounce buffer pre-mapped into GART to avoid
> page-table updates and TLB flushes on access.
> 
> Signed-off-by: Jonathan Kim 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 78
> +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  5 +-
>  2 files changed, 82 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 367abed1d6e6..512df4c09772 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -48,6 +48,7 @@
>  #include 
> 
>  #include 
> +#include 
> 
>  #include "amdgpu.h"
>  #include "amdgpu_object.h"
> @@ -1429,6 +1430,70 @@ static void amdgpu_ttm_vram_mm_access(struct
> amdgpu_device *adev, loff_t pos,
>   }
>  }
> 
> +static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object
> *bo,
> + unsigned long offset, void *buf, int
> len, int write)
> +{
> + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
> + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
> + struct amdgpu_job *job;
> + struct dma_fence *fence;
> + uint64_t src_addr, dst_addr;
> + unsigned int num_dw;
> + int r, idx;
> +
> + if (len != PAGE_SIZE)
> + return -EINVAL;
> +
> + if (!adev->mman.sdma_access_ptr)
> + return -EACCES;
> +
> + r = drm_dev_enter(adev_to_drm(adev), &idx);
> + if (r)
> + return r;
> +
> + if (write)
> + memcpy(adev->mman.sdma_access_ptr, buf, len);
> +
> + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
> + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4,
> AMDGPU_IB_POOL_DELAYED, &job);
> + if (r)
> + goto out;
> +
> + src_addr = write ? amdgpu_bo_gpu_offset(adev-
> >mman.sdma_access_bo) :
> + amdgpu_bo_gpu_offset(abo);
> + dst_addr = write ? amdgpu_bo_gpu_offset(abo) :
> + amdgpu_bo_gpu_offset(adev-
> >mman.sdma_access_bo);
> + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
> PAGE_SIZE, false);
> +
> + amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
> + WARN_ON(job->ibs[0].length_dw > num_dw);
> +
> + r = amdgpu_job_submit(job, &adev->mman.entity,
> AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
> + if (r) {
> + amdgpu_job_free(job);
> + goto out;
> + }
> +
> + if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
> + r = -ETIMEDOUT;
> + dma_fence_put(fence);
> +
> + if (!(r || write))
> + memcpy(buf, adev->mman.sdma_access_ptr, len);
> +out:
> + drm_dev_exit(idx);
> + return r;
> +}
> +
> +static inline bool amdgpu_ttm_allow_post_mortem_debug(struct
> amdgpu_device *adev)
> +{
> + return amdgpu_gpu_recovery == 0 ||
> + adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
> + adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
> + adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
> + adev->video_timeout == MAX_SCHEDULE_TIMEOUT;
> +}
> +
>  /**
>   * amdgpu_ttm_access_memory - Read or Write memory that backs a
> buffer object.
>   *
> @@ -1453,6 +1518,10 @@ static int amdgpu_ttm_access_memory(struct
> ttm_buffer_object *bo,
>   if (bo->resource->mem_type != TTM_PL_VRAM)
>   return -EIO;
> 
> + if (!amdgpu_ttm_allow_post_mortem_debug(adev) &&
> + !amdgpu_ttm_access_memory_sdma(bo, offset, buf,
> len, write))
> + return len;
> +
>   amdgpu_res_first(bo->resource, offset, len, &cursor);
>   while (cursor.remaining) {
>   size_t count, size = cursor.size;
> @@ -1793,6 +1862,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
>   return r;
>   }
> 
> + if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
> + AMDGPU_GEM_DOMAIN_GTT,
> + &adev->mman.sdma_access_bo, NULL,
> + adev->mman.sdma_acces