On Thu, Feb 5, 2026 at 2:17 AM Perry Yuan <[email protected]> wrote:
>
> Introduce a new IOCTL option to allow userspace explicit control over
> the Peak Tops Limiter (PTL) state for profiling
>
> Signed-off-by: Perry Yuan <[email protected]>
> Reviewed-by: Yifan Zhang <[email protected]>

Please provide a link to the userspace that uses this new IOCTL interface.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h  |  2 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c  |  2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 89 +++++++++++++++++++++++-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  6 ++
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c |  5 +-
>  include/uapi/linux/kfd_ioctl.h           |  7 ++
>  6 files changed, 108 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> index 0a2f8d33a0ad..1af641ae9a02 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
> @@ -476,6 +476,8 @@ struct psp_context {
>         enum amdgpu_ptl_fmt             ptl_fmt2;
>         bool                            ptl_enabled;
>         bool                            ptl_hw_supported;
> +       /* PTL disable reference counting */
> +       atomic_t                        ptl_disable_ref;
>  };
>
>  struct amdgpu_psp_funcs {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c 
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> index d94bf21db281..472e2f41fed2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
> @@ -2395,6 +2395,8 @@ static int gfx_v9_4_3_perf_monitor_ptl_init(struct 
> amdgpu_device *adev, bool sta
>
>         adev->psp.ptl_hw_supported = true;
>
> +       atomic_set(&adev->psp.ptl_disable_ref, 0);
> +
>         return 0;
>  }
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 285219781939..6457c5703f99 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1785,6 +1785,88 @@ int kfd_ptl_control(struct kfd_process_device *pdd, 
> bool enable)
>         return ret;
>  }
>
> +int kfd_ptl_disable_request(struct kfd_process_device *pdd,
> +               struct kfd_process *p)
> +{
> +       struct amdgpu_device *adev;
> +       int ret = 0;
> +
> +       if (!pdd)
> +               return -ENODEV;
> +
> +       adev = pdd->dev->adev;
> +       mutex_lock(&p->mutex);
> +
> +       if (pdd->ptl_disable_req)
> +               goto out;
> +
> +       if (atomic_inc_return(&adev->psp.ptl_disable_ref) == 1) {
> +               ret = kfd_ptl_control(pdd, false);
> +               if (ret) {
> +                       atomic_dec(&adev->psp.ptl_disable_ref);
> +                       dev_warn(pdd->dev->adev->dev,
> +                                       "failed to disable PTL\n");
> +                       goto out;
> +               }
> +       }
> +       pdd->ptl_disable_req = true;
> +
> +out:
> +       mutex_unlock(&p->mutex);
> +       return ret;
> +}
> +
> +int kfd_ptl_disable_release(struct kfd_process_device *pdd,
> +               struct kfd_process *p)
> +{
> +       struct amdgpu_device *adev;
> +       int ret = 0;
> +
> +       if (!pdd)
> +               return -ENODEV;
> +
> +       adev = pdd->dev->adev;
> +       mutex_lock(&p->mutex);
> +       if (!pdd->ptl_disable_req)
> +               goto out;
> +
> +       if (atomic_dec_return(&adev->psp.ptl_disable_ref) == 0) {
> +               ret = kfd_ptl_control(pdd, true);
> +               if (ret) {
> +                       atomic_inc(&adev->psp.ptl_disable_ref);
> +                       dev_warn(pdd->dev->adev->dev,
> +                                       "failed to enable PTL\n");
> +                       goto out;
> +               }
> +       }
> +       pdd->ptl_disable_req = false;
> +
> +out:
> +       mutex_unlock(&p->mutex);
> +       return ret;
> +}
> +
> +static int kfd_profiler_ptl_control(struct kfd_process *p,
> +               struct kfd_ioctl_ptl_control *args)
> +{
> +       struct kfd_process_device *pdd;
> +       int ret;
> +
> +       mutex_lock(&p->mutex);
> +       pdd = kfd_process_device_data_by_id(p, args->gpu_id);
> +       mutex_unlock(&p->mutex);
> +
> +       if (!pdd)
> +               return -ENODEV;
> +
> +       if (args->enable == 0)
> +               ret = kfd_ptl_disable_request(pdd, p);
> +       else
> +               ret = kfd_ptl_disable_release(pdd, p);
> +
> +       return ret;
> +}
> +
>  static int criu_checkpoint_process(struct kfd_process *p,
>                              uint8_t __user *user_priv_data,
>                              uint64_t *priv_offset)
> @@ -3250,7 +3332,7 @@ static inline uint32_t profile_lock_device(struct 
> kfd_process *p,
>                 if (!kfd->profiler_process) {
>                         kfd->profiler_process = p;
>                         status = 0;
> -                       kfd_ptl_control(pdd, false);
> +                       kfd_ptl_disable_request(pdd, p);
>                 } else if (kfd->profiler_process == p) {
>                         status = -EALREADY;
>                 } else {
> @@ -3259,7 +3341,8 @@ static inline uint32_t profile_lock_device(struct 
> kfd_process *p,
>         } else if (op == 0 && kfd->profiler_process == p) {
>                 kfd->profiler_process = NULL;
>                 status = 0;
> -               kfd_ptl_control(pdd, true);
> +               kfd_ptl_disable_release(pdd, p);
> +
>         }
>         mutex_unlock(&kfd->profiler_lock);
>
> @@ -3302,6 +3385,8 @@ static int kfd_ioctl_profiler(struct file *filep, 
> struct kfd_process *p, void *d
>                 return 0;
>         case KFD_IOC_PROFILER_PMC:
>                 return kfd_profiler_pmc(p, &args->pmc);
> +       case KFD_IOC_PROFILER_PTL_CONTROL:
> +               return kfd_profiler_ptl_control(p, &args->ptl);
>         }
>         return -EINVAL;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 164f69924a3d..48347065b9cd 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -872,6 +872,8 @@ struct kfd_process_device {
>         bool has_reset_queue;
>
>         u32 pasid;
> +       /* Indicates this process has requested PTL stay disabled */
> +       bool ptl_disable_req;
>  };
>
>  #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
> @@ -1609,6 +1611,10 @@ static inline bool kfd_is_first_node(struct kfd_node 
> *node)
>
>  /* PTL support */
>  int kfd_ptl_control(struct kfd_process_device *pdd, bool enable);
> +int kfd_ptl_disable_request(struct kfd_process_device *pdd,
> +               struct kfd_process *p);
> +int kfd_ptl_disable_release(struct kfd_process_device *pdd,
> +               struct kfd_process *p);
>
>  /* Debugfs */
>  #if defined(CONFIG_DEBUG_FS)
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 5114ac4da5b9..882080dc4925 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1115,7 +1115,6 @@ static void kfd_process_profiler_release(struct 
> kfd_process *p, struct kfd_proce
>         mutex_lock(&pdd->dev->kfd->profiler_lock);
>         if (pdd->dev->kfd->profiler_process == p) {
>                 pdd->qpd.dqm->ops.set_perfcount(pdd->qpd.dqm, 0);
> -               kfd_ptl_control(pdd, true);
>                 pdd->dev->kfd->profiler_process = NULL;
>         }
>         mutex_unlock(&pdd->dev->kfd->profiler_lock);
> @@ -1133,6 +1132,10 @@ static void kfd_process_destroy_pdds(struct 
> kfd_process *p)
>                 pr_debug("Releasing pdd (topology id %d, for pid %d)\n",
>                         pdd->dev->id, p->lead_thread->pid);
>                 kfd_process_profiler_release(p, pdd);
> +
> +               if (pdd->ptl_disable_req)
> +                       kfd_ptl_disable_release(pdd, p);
> +
>                 kfd_process_device_destroy_cwsr_dgpu(pdd);
>                 kfd_process_device_destroy_ib_mem(pdd);
>
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index 8db0c474a769..d48c407e9ee5 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -1572,6 +1572,7 @@ enum kfd_profiler_ops {
>         KFD_IOC_PROFILER_PMC = 0,
>         KFD_IOC_PROFILER_PC_SAMPLE = 1,
>         KFD_IOC_PROFILER_VERSION = 2,
> +       KFD_IOC_PROFILER_PTL_CONTROL = 3,
>  };
>
>  /**
> @@ -1583,10 +1584,16 @@ struct kfd_ioctl_pmc_settings {
>         __u32 perfcount_enable;   /* Force Perfcount Enable for queues on GPU 
> */
>  };
>
> +struct kfd_ioctl_ptl_control {
> +       __u32 gpu_id; /* user_gpu_id */
> +       __u32 enable; /* set 1 to enable PTL, set 0 to disable PTL */
> +};
> +
>  struct kfd_ioctl_profiler_args {
>         __u32 op;                                               /* 
> kfd_profiler_op */
>         union {
>                 struct kfd_ioctl_pmc_settings  pmc;
> +               struct kfd_ioctl_ptl_control   ptl;
>                 __u32 version;                          /* 
> KFD_IOC_PROFILER_VERSION_NUM */
>         };
>  };
> --
> 2.34.1
>

Reply via email to