On Tue, Sep 30, 2025 at 4:27 AM YiPeng Chai <[email protected]> wrote:
>
> Add amdgpu drm ras ioctl for ras module.

Please describe the IOCTL and how it is used and what functionality it
provides.  Additionally please provide a link to the proposed open
source userspace tools that will use it.  We can't merge the kernel
code until we have the userspace side available.  Additional comments
below.

>
> V2:
>   Updated ras ioctl structure and description.
>
> V3:
>   Rename the ras command.
>
> V4:
>   Remove some variables.
>
> V5:
>   Add null pointer check.
>
> Signed-off-by: YiPeng Chai <[email protected]>
> Reviewed-by: Tao Zhou <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h           |  3 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c       |  1 +
>  .../gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c  | 44 +++++++++++++++++++
>  include/uapi/drm/amdgpu_drm.h                 | 32 ++++++++++++++
>  4 files changed, 80 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 2a0df4cabb99..55cd2f75333e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -718,6 +718,9 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void 
> *data, struct drm_file *fi
>  int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
>                                 struct drm_file *filp);
>
> +int amdgpu_ras_mgr_ioctl(struct drm_device *dev, void *data,
> +                               struct drm_file *filp);
> +
>  /* VRAM scratch page for HDP bug, default vram page */
>  struct amdgpu_mem_scratch {
>         struct amdgpu_bo                *robj;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4905efa63ddc..f7038ba8571e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -3057,6 +3057,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
>         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_SIGNAL, amdgpu_userq_signal_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF_DRV(AMDGPU_USERQ_WAIT, amdgpu_userq_wait_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),
>         DRM_IOCTL_DEF_DRV(AMDGPU_GEM_LIST_HANDLES, 
> amdgpu_gem_list_handles_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
> +       DRM_IOCTL_DEF_DRV(AMDGPU_RAS, amdgpu_ras_mgr_ioctl, 
> DRM_AUTH|DRM_RENDER_ALLOW),

Is this interface for administrators only?  If so, you should either
remove DRM_RENDER_ALLOW, or check CAP_ADMIN in the IOCTL.

Alex

>  };
>
>  static const struct drm_driver amdgpu_kms_driver = {
> diff --git a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c 
> b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c
> index 195ca51a96d5..882b8ab7c843 100644
> --- a/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c
> +++ b/drivers/gpu/drm/amd/ras/ras_mgr/amdgpu_ras_cmd.c
> @@ -361,3 +361,47 @@ int amdgpu_ras_cmd_ioctl_handler(struct ras_core_context 
> *ras_core,
>
>         return RAS_CMD__SUCCESS;
>  }
> +
> +static int amdgpu_ras_get_caps(struct amdgpu_device *adev,
> +                       struct drm_amdgpu_ras *cmd)
> +{
> +       void __user *output_ptr = u64_to_user_ptr(cmd->output_buf_ptr);
> +       struct drm_amdgpu_ras_caps caps;
> +
> +       memset(&caps, 0, sizeof(caps));
> +
> +       if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id)
> +               caps.oam_id = adev->smuio.funcs->get_socket_id(adev);
> +
> +       if (output_ptr && (cmd->output_buf_size >= sizeof(caps)) &&
> +           !copy_to_user(output_ptr, &caps, sizeof(caps)))
> +               return 0;
> +
> +       return -EINVAL;
> +}
> +
> +int amdgpu_ras_mgr_ioctl(struct drm_device *dev, void *data,
> +                               struct drm_file *filp)
> +{
> +       struct amdgpu_device *adev = drm_to_adev(dev);
> +       struct amdgpu_ras_mgr *ras_mgr = amdgpu_ras_mgr_get_context(adev);
> +       struct drm_amdgpu_ras *args = data;
> +       int res = RAS_CMD__ERROR_INVALID_CMD;
> +
> +       if (!ras_mgr || !ras_mgr->ras_core || !args)
> +               return -EPERM;
> +
> +       if (!ras_core_is_enabled(ras_mgr->ras_core))
> +               return RAS_CMD__ERROR_ACCESS_DENIED;
> +
> +       switch (args->cmd_id) {
> +       case AMDGPU_RAS_CMD_GET_CAPS:
> +               res = amdgpu_ras_get_caps(adev, args);
> +               break;
> +       default:
> +               res = RAS_CMD__ERROR_UKNOWN_CMD;
> +               break;
> +       }
> +
> +       return res;
> +}
> diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
> index cd7402e36b6d..71771755eca8 100644
> --- a/include/uapi/drm/amdgpu_drm.h
> +++ b/include/uapi/drm/amdgpu_drm.h
> @@ -59,6 +59,9 @@ extern "C" {
>  #define DRM_AMDGPU_USERQ_WAIT          0x18
>  #define DRM_AMDGPU_GEM_LIST_HANDLES    0x19
>
> +/* amdgpu ras ioctls */
> +#define DRM_AMDGPU_RAS                 0x5d
> +
>  #define DRM_IOCTL_AMDGPU_GEM_CREATE    DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_CREATE, union drm_amdgpu_gem_create)
>  #define DRM_IOCTL_AMDGPU_GEM_MMAP      DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_MMAP, union drm_amdgpu_gem_mmap)
>  #define DRM_IOCTL_AMDGPU_CTX           DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_CTX, union drm_amdgpu_ctx)
> @@ -79,6 +82,7 @@ extern "C" {
>  #define DRM_IOCTL_AMDGPU_USERQ_SIGNAL  DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ_SIGNAL, struct drm_amdgpu_userq_signal)
>  #define DRM_IOCTL_AMDGPU_USERQ_WAIT    DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_USERQ_WAIT, struct drm_amdgpu_userq_wait)
>  #define DRM_IOCTL_AMDGPU_GEM_LIST_HANDLES DRM_IOWR(DRM_COMMAND_BASE + 
> DRM_AMDGPU_GEM_LIST_HANDLES, struct drm_amdgpu_gem_list_handles)
> +#define DRM_IOCTL_AMDGPU_RAS   DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_RAS, 
> struct drm_amdgpu_ras)
>
>  /**
>   * DOC: memory domains
> @@ -1686,6 +1690,34 @@ struct drm_color_ctm_3x4 {
>         __u64 matrix[12];
>  };
>
> +/*
> + * Amdgpu ras command id
> + */
> +
> +/* Get amdgpu ras capabilities */
> +#define AMDGPU_RAS_CMD_GET_CAPS         0x01
> +
> +/* Input structure for amdgpu ras ioctl */
> +struct  drm_amdgpu_ras {
> +       /* Amdgpu ras version */
> +       __u32 version;
> +       /* AMDGPU_RAS_CMD_* */
> +       __u32 cmd_id;
> +       /* Pointer to input parameter buffer */
> +       __u64 input_buf_ptr;
> +       /* Pointer to output buffer */
> +       __u64 output_buf_ptr;
> +       /* Output buffer size */
> +       __u32 output_buf_size;
> +       __u32 pad;
> +};
> +
> +struct drm_amdgpu_ras_caps {
> +       /* OAM ID */
> +       __u32 oam_id;
> +       __u32 pad;
> +};
> +
>  #if defined(__cplusplus)
>  }
>  #endif
> --
> 2.34.1
>

Reply via email to