On Thu, Feb 5, 2026 at 2:02 AM Perry Yuan <[email protected]> wrote:
>
> Introduce a Peak Tops Limiter (PTL) driver that dynamically caps
> engine frequency to ensure delivered TOPS never exceeds a defined
> TOPS_limit. This initial implementation provides core data structures
> and kernel-space interfaces (set/get, enable/disable) to manage PTL state.
>
> PTL performs a firmware handshake to initialize its state and update
> predefined format types. It supports updating these format types at
> runtime while user-space tools automatically switch PTL state, and
> also allows explicitly switching PTL state via newly added commands.
>
> Signed-off-by: Perry Yuan <[email protected]>
> Reviewed-by: Lijo Lazar <[email protected]>
> Acked-by: Alex Deucher <[email protected]>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 80 +++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 6 ++
> include/uapi/linux/kfd_ioctl.h | 9 +++
> 3 files changed, 95 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index b0540b009e84..2fbc3f95fedd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -679,6 +679,8 @@ static const char *psp_gfx_cmd_name(enum psp_gfx_cmd_id
> cmd_id)
> return "SPATIAL_PARTITION";
> case GFX_CMD_ID_FB_NPS_MODE:
> return "NPS_MODE_CHANGE";
> + case GFX_CMD_ID_PERF_HW:
> + return "PERF MONITORING HW";
> default:
> return "UNKNOWN CMD";
> }
> @@ -1197,6 +1199,84 @@ int psp_memory_partition(struct psp_context *psp, int
> mode)
> return ret;
> }
>
> +static int psp_ptl_fmt_verify(struct psp_context *psp, enum amdgpu_ptl_fmt
> fmt,
> + uint32_t *ptl_fmt)
> +{
> + struct amdgpu_device *adev = psp->adev;
> +
> + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4))
> + return -EINVAL;
> +
> + switch (fmt) {
> + case AMDGPU_PTL_FMT_I8:
> + *ptl_fmt = GFX_FTYPE_I8;
> + break;
> + case AMDGPU_PTL_FMT_F16:
> + *ptl_fmt = GFX_FTYPE_F16;
> + break;
> + case AMDGPU_PTL_FMT_BF16:
> + *ptl_fmt = GFX_FTYPE_BF16;
> + break;
> + case AMDGPU_PTL_FMT_F32:
> + *ptl_fmt = GFX_FTYPE_F32;
> + break;
> + case AMDGPU_PTL_FMT_F64:
> + *ptl_fmt = GFX_FTYPE_F64;
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code,
> + uint32_t *ptl_state, uint32_t *fmt1, uint32_t
> *fmt2)
> +{
> + struct psp_gfx_cmd_resp *cmd;
> + uint32_t ptl_fmt1, ptl_fmt2;
> + int ret;
> +
> + if (!psp || !ptl_state || !fmt1 || !fmt2)
> + return -EINVAL;
> +
> + if (amdgpu_sriov_vf(psp->adev))
> + return 0;
> +
> + if (psp_ptl_fmt_verify(psp, *fmt1, &ptl_fmt1) ||
> + psp_ptl_fmt_verify(psp, *fmt2, &ptl_fmt2))
> + return -EINVAL;
Do we need a FW version check for this?
Alex
> +
> + cmd = acquire_psp_cmd_buf(psp);
> +
> + cmd->cmd_id = GFX_CMD_ID_PERF_HW;
> + cmd->cmd.cmd_req_perf_hw.req = req_code;
> + cmd->cmd.cmd_req_perf_hw.ptl_state = *ptl_state;
> + cmd->cmd.cmd_req_perf_hw.pref_format1 = ptl_fmt1;
> + cmd->cmd.cmd_req_perf_hw.pref_format2 = ptl_fmt2;
> +
> + ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
> + if (ret)
> + goto out;
> +
> + switch (req_code) {
> + case PSP_PTL_PERF_MON_QUERY:
> + *ptl_state = cmd->resp.uresp.perf_hw_info.ptl_state;
> + *fmt1 = cmd->resp.uresp.perf_hw_info.pref_format1;
> + *fmt2 = cmd->resp.uresp.perf_hw_info.pref_format2;
> + break;
> + case PSP_PTL_PERF_MON_SET:
> + psp->ptl_enabled = *ptl_state;
> + psp->ptl_fmt1 = ptl_fmt1;
> + psp->ptl_fmt2 = ptl_fmt2;
> + break;
> + }
> +
> +out:
> + release_psp_cmd_buf(psp);
> + return ret;
> +}
> +
> int psp_spatial_partition(struct psp_context *psp, int mode)
> {
> struct psp_gfx_cmd_resp *cmd;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 90df8e29f532..47c8becbf710 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -471,6 +471,10 @@ struct psp_context {
> #if defined(CONFIG_DEBUG_FS)
> struct spirom_bo *spirom_dump_trip;
> #endif
> + enum amdgpu_ptl_fmt ptl_fmt1;
> + enum amdgpu_ptl_fmt ptl_fmt2;
> + bool ptl_enabled;
> + bool ptl_hw_supported;
> };
>
> struct amdgpu_psp_funcs {
> @@ -654,5 +658,7 @@ void amdgpu_psp_debugfs_init(struct amdgpu_device *adev);
> int amdgpu_psp_get_fw_type(struct amdgpu_firmware_info *ucode,
> enum psp_gfx_fw_type *type);
>
> +int psp_performance_monitor_hw(struct psp_context *psp, u32 req_code,
> + u32 *ptl_state, u32 *fmt1, u32 *fmt2);
>
> #endif
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index abb526c915c3..8db0c474a769 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -1558,6 +1558,15 @@ struct kfd_ioctl_dbg_trap_args {
> };
> };
>
> +enum amdgpu_ptl_fmt {
> + AMDGPU_PTL_FMT_I8 = 0,
> + AMDGPU_PTL_FMT_F16 = 1,
> + AMDGPU_PTL_FMT_BF16 = 2,
> + AMDGPU_PTL_FMT_F32 = 3,
> + AMDGPU_PTL_FMT_F64 = 4,
> + AMDGPU_PTL_FMT_INVALID = 5,
> +};
> +
> #define KFD_IOC_PROFILER_VERSION_NUM 1
> enum kfd_profiler_ops {
> KFD_IOC_PROFILER_PMC = 0,
> --
> 2.34.1
>