On Thu, Feb 5, 2026 at 2:17 AM Perry Yuan <[email protected]> wrote:
>
> Introduce a new IOCTL option to allow userspace explicit control over
> the Peak Tops Limiter (PTL) state for profiling
>
> Signed-off-by: Perry Yuan <[email protected]>
> Reviewed-by: Yifan Zhang <[email protected]>
Please provide a link to the userspace that uses this new IOCTL interface.
Alex
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 2 +
> drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 2 +
> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 89 +++++++++++++++++++++++-
> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 ++
> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 +-
> include/uapi/linux/kfd_ioctl.h | 7 ++
> 6 files changed, 108 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 0a2f8d33a0ad..1af641ae9a02 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -476,6 +476,8 @@ struct psp_context {
> enum amdgpu_ptl_fmt ptl_fmt2;
> bool ptl_enabled;
> bool ptl_hw_supported;
> + /* PTL disable reference counting */
> + atomic_t ptl_disable_ref;
> };
>
> struct amdgpu_psp_funcs {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index d94bf21db281..472e2f41fed2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -2395,6 +2395,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct
> amdgpu_device *adev, bool sta
>
> adev->psp.ptl_hw_supported = true;
>
> + atomic_set(&adev->psp.ptl_disable_ref, 0);
> +
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 285219781939..6457c5703f99 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1785,6 +1785,88 @@ int kfd_ptl_control(struct kfd_process_device *pdd,
> bool enable)
> return ret;
> }
>
> +int kfd_ptl_disable_request(struct kfd_process_device *pdd,
> + struct kfd_process *p)
> +{
> + struct amdgpu_device *adev;
> + int ret = 0;
> +
> + if (!pdd)
> + return -ENODEV;
> +
> + adev = pdd->dev->adev;
> + mutex_lock(&p->mutex);
> +
> + if (pdd->ptl_disable_req)
> + goto out;
> +
> + if (atomic_inc_return(&adev->psp.ptl_disable_ref) == 1) {
> + ret = kfd_ptl_control(pdd, false);
> + if (ret) {
> + atomic_dec(&adev->psp.ptl_disable_ref);
> + dev_warn(pdd->dev->adev->dev,
> + "failed to disable PTL\n");
> + goto out;
> + }
> + }
> + pdd->ptl_disable_req = true;
> +
> +out:
> + mutex_unlock(&p->mutex);
> + return ret;
> +}
> +
> +int kfd_ptl_disable_release(struct kfd_process_device *pdd,
> + struct kfd_process *p)
> +{
> + struct amdgpu_device *adev;
> + int ret = 0;
> +
> + if (!pdd)
> + return -ENODEV;
> +
> + adev = pdd->dev->adev;
> + mutex_lock(&p->mutex);
> + if (!pdd->ptl_disable_req)
> + goto out;
> +
> + if (atomic_dec_return(&adev->psp.ptl_disable_ref) == 0) {
> + ret = kfd_ptl_control(pdd, true);
> + if (ret) {
> + atomic_inc(&adev->psp.ptl_disable_ref);
> + dev_warn(pdd->dev->adev->dev,
> + "failed to enable PTL\n");
> + goto out;
> + }
> + }
> + pdd->ptl_disable_req = false;
> +
> +out:
> + mutex_unlock(&p->mutex);
> + return ret;
> +}
> +
> +static int kfd_profiler_ptl_control(struct kfd_process *p,
> + struct kfd_ioctl_ptl_control *args)
> +{
> + struct kfd_process_device *pdd;
> + int ret;
> +
> + mutex_lock(&p->mutex);
> + pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> + mutex_unlock(&p->mutex);
> +
> + if (!pdd)
> + return -ENODEV;
> +
> + if (args->enable == 0)
> + ret = kfd_ptl_disable_request(pdd, p);
> + else
> + ret = kfd_ptl_disable_release(pdd, p);
> +
> + return ret;
> +}
> +
> static int criu_checkpoint_process(struct kfd_process *p,
> uint8_t __user *user_priv_data,
> uint64_t *priv_offset)
> @@ -3250,7 +3332,7 @@ static inline uint32_t profile_lock_device(struct
> kfd_process *p,
> if (!kfd->profiler_process) {
> kfd->profiler_process = p;
> status = 0;
> - kfd_ptl_control(pdd, false);
> + kfd_ptl_disable_request(pdd, p);
> } else if (kfd->profiler_process == p) {
> status = -EALREADY;
> } else {
> @@ -3259,7 +3341,8 @@ static inline uint32_t profile_lock_device(struct
> kfd_process *p,
> } else if (op == 0 && kfd->profiler_process == p) {
> kfd->profiler_process = NULL;
> status = 0;
> - kfd_ptl_control(pdd, true);
> + kfd_ptl_disable_release(pdd, p);
> +
> }
> mutex_unlock(&kfd->profiler_lock);
>
> @@ -3302,6 +3385,8 @@ static int kfd_ioctl_profiler(struct file *filep,
> struct kfd_process *p, void *d
> return 0;
> case KFD_IOC_PROFILER_PMC:
> return kfd_profiler_pmc(p, &args->pmc);
> + case KFD_IOC_PROFILER_PTL_CONTROL:
> + return kfd_profiler_ptl_control(p, &args->ptl);
> }
> return -EINVAL;
> }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 164f69924a3d..48347065b9cd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -872,6 +872,8 @@ struct kfd_process_device {
> bool has_reset_queue;
>
> u32 pasid;
> + /* Indicates this process has requested PTL stay disabled */
> + bool ptl_disable_req;
> };
>
> #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> @@ -1609,6 +1611,10 @@ static inline bool kfd_is_first_node(struct kfd_node
> *node)
>
> /* PTL support */
> int kfd_ptl_control(struct kfd_process_device *pdd, bool enable);
> +int kfd_ptl_disable_request(struct kfd_process_device *pdd,
> + struct kfd_process *p);
> +int kfd_ptl_disable_release(struct kfd_process_device *pdd,
> + struct kfd_process *p);
>
> /* Debugfs */
> #if defined(CONFIG_DEBUG_FS)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 5114ac4da5b9..882080dc4925 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1115,7 +1115,6 @@ static void kfd_process_profiler_release(struct
> kfd_process *p, struct kfd_proce
> mutex_lock(&pdd->dev->kfd->profiler_lock);
> if (pdd->dev->kfd->profiler_process == p) {
> pdd->qpd.dqm->ops.set_perfcount(pdd->qpd.dqm, 0);
> - kfd_ptl_control(pdd, true);
> pdd->dev->kfd->profiler_process = NULL;
> }
> mutex_unlock(&pdd->dev->kfd->profiler_lock);
> @@ -1133,6 +1132,10 @@ static void kfd_process_destroy_pdds(struct
> kfd_process *p)
> pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
> pdd->dev->id, p->lead_thread->pid);
> kfd_process_profiler_release(p, pdd);
> +
> + if (pdd->ptl_disable_req)
> + kfd_ptl_disable_release(pdd, p);
> +
> kfd_process_device_destroy_cwsr_dgpu(pdd);
> kfd_process_device_destroy_ib_mem(pdd);
>
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 8db0c474a769..d48c407e9ee5 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -1572,6 +1572,7 @@ enum kfd_profiler_ops {
> KFD_IOC_PROFILER_PMC = 0,
> KFD_IOC_PROFILER_PC_SAMPLE = 1,
> KFD_IOC_PROFILER_VERSION = 2,
> + KFD_IOC_PROFILER_PTL_CONTROL = 3,
> };
>
> /**
> @@ -1583,10 +1584,16 @@ struct kfd_ioctl_pmc_settings {
> __u32 perfcount_enable; /* Force Perfcount Enable for queues on GPU
> */
> };
>
> +struct kfd_ioctl_ptl_control {
> + __u32 gpu_id; /* user_gpu_id */
> + __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */
> +};
> +
> struct kfd_ioctl_profiler_args {
> __u32 op; /*
> kfd_profiler_op */
> union {
> struct kfd_ioctl_pmc_settings pmc;
> + struct kfd_ioctl_ptl_control ptl;
> __u32 version; /*
> KFD_IOC_PROFILER_VERSION_NUM */
> };
> };
> --
> 2.34.1
>