This patch implements full Streaming Performance Monitor (SPM) support for
AMD GPUs, enabling real-time performance counter data collection directly
to userspace buffers.

The implementation provides three key operations:
- AMDGPU_SPM_OP_ACQUIRE: Acquire exclusive access to SPM hardware resources
- AMDGPU_SPM_OP_RELEASE: Release SPM hardware for use by other processes
- AMDGPU_SPM_OP_SET_DEST_BUF: Configure destination buffer and manage
  counter data streaming

Key features:
- Flexible destination buffer management with configurable timeout behavior
- Automatic detection and reporting of data loss due to ring buffer overflow
- Support for partial buffer fills with explicit data size reporting
- Detailed kernel API documentation with operation semantics

The amdgpu_spm_setdestbuff() function allowing profiling tools to efficiently
collect performance data from the GPU. The timeout mechanism enables waiting
for a buffer to fill completely before switching to a new one, or immediate
switching with partial data preservation.

This enables performance analysis tools, and profiling frameworks to access
real-time GPU performance metrics without kernel-mode overhead.

Signed-off-by: James Zhu <[email protected]>
---
 amdgpu/amdgpu-symbols.txt |  3 ++
 amdgpu/amdgpu.h           | 38 ++++++++++++++++
 amdgpu/amdgpu_profiler.c  | 68 +++++++++++++++++++++++++++++
 include/drm/amdgpu_drm.h  | 92 ++++++++++++++++++++++++++++++++++++++-
 4 files changed, 200 insertions(+), 1 deletion(-)

diff --git a/amdgpu/amdgpu-symbols.txt b/amdgpu/amdgpu-symbols.txt
index 8cd5559c..b33958ef 100644
--- a/amdgpu/amdgpu-symbols.txt
+++ b/amdgpu/amdgpu-symbols.txt
@@ -88,3 +88,6 @@ amdgpu_create_userqueue
 amdgpu_free_userqueue
 amdgpu_userq_signal
 amdgpu_userq_wait
+amdgpu_spm_acquire
+amdgpu_spm_release
+amdgpu_spm_setdestbuff
diff --git a/amdgpu/amdgpu.h b/amdgpu/amdgpu.h
index 4ec1f6b6..8698f26a 100644
--- a/amdgpu/amdgpu.h
+++ b/amdgpu/amdgpu.h
@@ -2129,7 +2129,45 @@ int amdgpu_cwsr_set_l2_trap_handler(amdgpu_device_handle 
dev,
  */
 int amdgpu_profiler_version(amdgpu_device_handle dev);
 
+/**
+ * Acquire request exclusive use of SPM
+ * \param   dev               - \c [in]     device handle
+ *
+ * \return  0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_acquire(amdgpu_device_handle dev);
+
+/**
+ * Release exclusive use of SPM
+ *
+ * \return  0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_release(amdgpu_device_handle dev);
+
+/**
+ *  Set up the destination user mode buffer for stream performance
+ *  counter data.
+ * \param   dev               - \c [in]     device handle
+ * \param   size_in_bytes     - \c [in]     size of the buffer
+ * \param   timeout           - \c [in/out] timeout in milliseconds
+ * \param   size_copied       - \c [in]     number of bytes copied
+ * \param   dest_mem_addr     - \c [in]     destination address. Set to NULL
+ *                                          to stop copy on previous buffer
+ * \param   is_spm_data_loss  - \c [in]     true if data was lost
+ *
+ * \return  0 on success otherwise POSIX Error code
+ */
+int amdgpu_spm_setdestbuff(
+                       amdgpu_device_handle dev,
+                       uint32_t             size_in_bytes,
+                       uint32_t             *timeout,
+                       uint32_t             *size_copied,
+                       void                 *dest_mem_addr,
+                       bool                 *is_spm_data_loss
+       );
+
 #ifdef __cplusplus
 }
+
 #endif
 #endif /* #ifdef _AMDGPU_H_ */
diff --git a/amdgpu/amdgpu_profiler.c b/amdgpu/amdgpu_profiler.c
index 8d4dffe4..e9d30fb6 100644
--- a/amdgpu/amdgpu_profiler.c
+++ b/amdgpu/amdgpu_profiler.c
@@ -44,3 +44,71 @@ amdgpu_profiler_version(amdgpu_device_handle dev)
 
        return ret;
 }
+
+drm_public int
+amdgpu_spm_acquire(amdgpu_device_handle dev)
+{
+       int ret;
+       struct drm_amdgpu_profiler_args user_arg;
+
+       if (!dev)
+               return -EINVAL;
+
+       memset(&user_arg, 0, sizeof(user_arg));
+       user_arg.op = AMDGPU_PROFILER_SPM;
+       user_arg.spm.op = AMDGPU_SPM_OP_ACQUIRE;
+
+       ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+                                 &user_arg, sizeof(user_arg));
+
+       return ret;
+}
+
+drm_public int
+amdgpu_spm_release(amdgpu_device_handle dev)
+{
+       struct drm_amdgpu_profiler_args user_arg;
+
+       if (!dev)
+               return -EINVAL;
+
+       memset(&user_arg, 0, sizeof(user_arg));
+       user_arg.op = AMDGPU_PROFILER_SPM;
+       user_arg.spm.op = AMDGPU_SPM_OP_RELEASE;
+
+       return drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+                                  &user_arg, sizeof(user_arg));
+}
+
+drm_public int
+amdgpu_spm_setdestbuff(
+               amdgpu_device_handle dev,
+               uint32_t             size_in_bytes,
+               uint32_t             *timeout,
+               uint32_t             *size_copied,
+               void                 *dest_mem_addr,
+               bool                 *is_spm_data_loss
+       )
+{
+       int ret;
+       struct drm_amdgpu_profiler_args user_arg;
+
+       if (!dev)
+               return -EINVAL;
+
+       memset(&user_arg, 0, sizeof(user_arg));
+       user_arg.op = AMDGPU_PROFILER_SPM;
+       user_arg.spm.op = AMDGPU_SPM_OP_SET_DEST_BUF;
+       user_arg.spm.timeout = *timeout;
+       user_arg.spm.dest_buf = (uint64_t)dest_mem_addr;
+       user_arg.spm.buf_size = size_in_bytes;
+
+       ret = drmCommandWriteRead(dev->fd, DRM_AMDGPU_PROFILER,
+                                  &user_arg, sizeof(user_arg));
+
+       *size_copied = user_arg.spm.bytes_copied;
+       *is_spm_data_loss = user_arg.spm.has_data_loss;
+       *timeout = user_arg.spm.timeout;
+
+       return ret;
+}
diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
index 307242ac..60c73233 100644
--- a/include/drm/amdgpu_drm.h
+++ b/include/drm/amdgpu_drm.h
@@ -1698,10 +1698,99 @@ struct drm_amdgpu_info_gpuvm_fault {
 #define AMDGPU_FAMILY_GC_12_0_0                        152 /* GC 12.0.0 */
 
 /*
- * Supported Profiler Operations
+ * Supported SPM (Stream Performance Monitor) Operations
  */
+/**
+ * drm_amdgpu_spm_op - SPM ioctl operations
+ *
+ * @AMDGPU_SPM_OP_ACQUIRE: acquire exclusive access to SPM
+ * @AMDGPU_SPM_OP_RELEASE: release exclusive access to SPM
+ * @AMDGPU_SPM_OP_SET_DEST_BUF: set or unset destination buffer for SPM 
streaming
+ */
+enum drm_amdgpu_spm_op {
+       AMDGPU_SPM_OP_ACQUIRE,
+       AMDGPU_SPM_OP_RELEASE,
+       AMDGPU_SPM_OP_SET_DEST_BUF
+};
+
+/**
+ * drm_amdgpu_spm_args - Arguments for SPM ioctl
+ *
+ * @op[in]:            specifies the operation to perform
+ * @dst_buf[in]:       used for the address of the destination buffer
+ *                      in @AMDGPU_SPM_SET_DEST_BUFFER
+ * @buf_size[in]:      size of the destination buffer
+ * @timeout[in/out]:   [in]: timeout in milliseconds, [out]: amount of time 
left
+ *                      `in the timeout window
+ * @bytes_copied[out]: total amount of data that was copied to the previous 
dest_buf
+ * @has_data_loss:     total count for sub-block which has data loss
+ *
+ * This ioctl performs different functions depending on the @op parameter.
+ *
+ * AMDGPU_SPM_OP_ACQUIRE
+ * ------------------------
+ *
+ * Acquires exclusive access of SPM on the specified for the calling process.
+ * This must be called before using AMDGPU_SPM_OP_SET_DEST_BUF.
+ *
+ * AMDGPU_SPM_OP_RELEASE
+ * ------------------------
+ *
+ * Releases exclusive access of SPM on the specified for the calling process,
+ * which allows another process to acquire it in the future.
+ *
+ * AMDGPU_SPM_OP_SET_DEST_BUF
+ * -----------------------------
+ *
+ * If @dst_buf is NULL, the destination buffer address is unset and copying of 
counters
+ * is stopped.
+ *
+ * If @dst_buf is not NULL, it specifies the pointer to a new destination 
buffer.
+ * @buf_size specifies the size of the buffer.
+ *
+ * If @timeout is non-0, the call will wait for up to @timeout ms for the 
previous
+ * buffer to be filled. If previous buffer to be filled before timeout, the 
@timeout
+ * will be updated value with the time remaining. If the timeout is exceeded, 
the function
+ * copies any partial data available into the previous user buffer and returns 
success.
+ * The amount of valid data in the previous user buffer is indicated by 
@bytes_copied.
+ *
+ * If @timeout is 0, the function immediately replaces the previous 
destination buffer
+ * without waiting for the previous buffer to be filled. That means the 
previous buffer
+ * may only be partially filled, and @bytes_copied will indicate how much data 
has been
+ * copied to it.
+ *
+ * If data was lost, e.g. due to a ring buffer overflow, @has_data_loss will 
be non-0.
+ *
+ * Returns negative error code on failure, 0 on success.
+ */
+struct drm_amdgpu_spm_args {
+       __u64 dest_buf;
+       __u32 buf_size;
+       __u32 op;
+       __u32 timeout;
+       __u32 bytes_copied;
+       __u32 has_data_loss;
+       __u32 pad;
+};
+
+/**
+ * drm_amdgpu_spm_buffer_header - SPM Buffer header for 
drm_amdgpu_spm_args->dest_buf
+ *
+ * @version        [out]: spm version
+ * @bytes_copied   [out]: amount of data for each sub-block
+ * @has_data_loss: [out]: boolean indicating whether data was lost for each 
sub-block
+ *                        (e.g. due to a ring-buffer overflow)
+ */
+struct drm_amdgpu_spm_buffer_header {
+       __u32 version; /* 0-23: minor 24-31: major */
+       __u32 bytes_copied;
+       __u32 has_data_loss;
+       __u32 reserved[5];
+};
+
 enum drm_amdgpu_profiler_ops {
        AMDGPU_PROFILER_VERSION = 0,
+       AMDGPU_PROFILER_SPM,
 };
 
 struct drm_amdgpu_profiler_args {
@@ -1711,6 +1800,7 @@ struct drm_amdgpu_profiler_args {
                                          * lower 16 bit: minor
                                          * higher 16 bit: major
                                          */
+           struct drm_amdgpu_spm_args spm;
        };
 };
 
-- 
2.34.1

Reply via email to