This change adds the IOCTL to query data about the performance counter
setup. Some of this data was available via previous DEV_QUERY calls,
for instance for GPU info, but exposing it via PERF_INFO
minimizes the overhead of creating a single session to just the one
aggregate IOCTL.

Signed-off-by: Lukas Zapolskas <[email protected]>
Reviewed-by: Adrián Larumbe <[email protected]>
---
 drivers/gpu/drm/panthor/Makefile         |  1 +
 drivers/gpu/drm/panthor/panthor_device.c |  7 ++
 drivers/gpu/drm/panthor/panthor_device.h |  3 +
 drivers/gpu/drm/panthor/panthor_drv.c    | 10 ++-
 drivers/gpu/drm/panthor/panthor_fw.h     |  3 +
 drivers/gpu/drm/panthor/panthor_perf.c   | 89 ++++++++++++++++++++++++
 drivers/gpu/drm/panthor/panthor_perf.h   | 15 ++++
 drivers/gpu/drm/panthor/panthor_regs.h   |  3 +
 8 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/panthor/panthor_perf.c
 create mode 100644 drivers/gpu/drm/panthor/panthor_perf.h

diff --git a/drivers/gpu/drm/panthor/Makefile b/drivers/gpu/drm/panthor/Makefile
index 753a32c446df..824e5d6e9e3d 100644
--- a/drivers/gpu/drm/panthor/Makefile
+++ b/drivers/gpu/drm/panthor/Makefile
@@ -10,6 +10,7 @@ panthor-y := \
        panthor_heap.o \
        panthor_hw.o \
        panthor_mmu.o \
+       panthor_perf.o \
        panthor_pwr.o \
        panthor_sched.o
 
diff --git a/drivers/gpu/drm/panthor/panthor_device.c 
b/drivers/gpu/drm/panthor/panthor_device.c
index 54fbb1aa07c5..dc237da92340 100644
--- a/drivers/gpu/drm/panthor/panthor_device.c
+++ b/drivers/gpu/drm/panthor/panthor_device.c
@@ -22,6 +22,7 @@
 #include "panthor_gpu.h"
 #include "panthor_hw.h"
 #include "panthor_mmu.h"
+#include "panthor_perf.h"
 #include "panthor_pwr.h"
 #include "panthor_regs.h"
 #include "panthor_sched.h"
@@ -305,6 +306,10 @@ int panthor_device_init(struct panthor_device *ptdev)
 
        panthor_gem_init(ptdev);
 
+       ret = panthor_perf_init(ptdev);
+       if (ret)
+               goto err_unplug_sched;
+
        /* ~3 frames */
        pm_runtime_set_autosuspend_delay(ptdev->base.dev, 50);
        pm_runtime_use_autosuspend(ptdev->base.dev);
@@ -318,6 +323,8 @@ int panthor_device_init(struct panthor_device *ptdev)
 
 err_disable_autosuspend:
        pm_runtime_dont_use_autosuspend(ptdev->base.dev);
+
+err_unplug_sched:
        panthor_sched_unplug(ptdev);
 
 err_unplug_fw:
diff --git a/drivers/gpu/drm/panthor/panthor_device.h 
b/drivers/gpu/drm/panthor/panthor_device.h
index f35e52b9546a..64b0048de6ac 100644
--- a/drivers/gpu/drm/panthor/panthor_device.h
+++ b/drivers/gpu/drm/panthor/panthor_device.h
@@ -142,6 +142,9 @@ struct panthor_device {
        /** @pwr: Power control management data. */
        struct panthor_pwr *pwr;
 
+       /** @perf_info: Performance counter interface information. */
+       struct drm_panthor_perf_info perf_info;
+
        /** @gpu: GPU management data. */
        struct panthor_gpu *gpu;
 
diff --git a/drivers/gpu/drm/panthor/panthor_drv.c 
b/drivers/gpu/drm/panthor/panthor_drv.c
index 98d4e8d867ed..f9e01c42d237 100644
--- a/drivers/gpu/drm/panthor/panthor_drv.c
+++ b/drivers/gpu/drm/panthor/panthor_drv.c
@@ -178,7 +178,8 @@ panthor_get_uobj_array(const struct drm_panthor_obj_array 
*in, u32 min_stride,
                 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_submit, syncs), \
                 PANTHOR_UOBJ_DECL(struct drm_panthor_queue_create, 
ringbuf_size), \
                 PANTHOR_UOBJ_DECL(struct drm_panthor_vm_bind_op, syncs), \
-                PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size))
+                PANTHOR_UOBJ_DECL(struct drm_panthor_bo_sync_op, size), \
+                PANTHOR_UOBJ_DECL(struct drm_panthor_perf_info, shader_blocks))
 
 /**
  * PANTHOR_UOBJ_SET() - Copy a kernel object to a user object.
@@ -838,6 +839,10 @@ static int panthor_ioctl_dev_query(struct drm_device 
*ddev, void *data, struct d
                        args->size = sizeof(priorities_info);
                        return 0;
 
+               case DRM_PANTHOR_DEV_QUERY_PERF_INFO:
+                       args->size = sizeof(ptdev->perf_info);
+                       return 0;
+
                default:
                        return -EINVAL;
                }
@@ -862,6 +867,9 @@ static int panthor_ioctl_dev_query(struct drm_device *ddev, 
void *data, struct d
                panthor_query_group_priorities_info(file, &priorities_info);
                return PANTHOR_UOBJ_SET(args->pointer, args->size, 
priorities_info);
 
+       case DRM_PANTHOR_DEV_QUERY_PERF_INFO:
+               return PANTHOR_UOBJ_SET(args->pointer, args->size, 
ptdev->perf_info);
+
        default:
                return -EINVAL;
        }
diff --git a/drivers/gpu/drm/panthor/panthor_fw.h 
b/drivers/gpu/drm/panthor/panthor_fw.h
index fbdc21469ba3..dae78618a080 100644
--- a/drivers/gpu/drm/panthor/panthor_fw.h
+++ b/drivers/gpu/drm/panthor/panthor_fw.h
@@ -198,8 +198,11 @@ struct panthor_fw_global_control_iface {
        u32 output_va;
        u32 group_num;
        u32 group_stride;
+#define GLB_PERFCNT_FW_SIZE(x) ((((x) >> 16) << 8))
        u32 perfcnt_size;
        u32 instr_features;
+#define PERFCNT_FEATURES_MD_SIZE(x) (((x) & GENMASK(3, 0)) << 8)
+       u32 perfcnt_features;
 };
 
 struct panthor_fw_global_input_iface {
diff --git a/drivers/gpu/drm/panthor/panthor_perf.c 
b/drivers/gpu/drm/panthor/panthor_perf.c
new file mode 100644
index 000000000000..842d62826ac3
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_perf.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0 or MIT
+/* Copyright 2023 Collabora Ltd */
+/* Copyright 2025 Arm ltd. */
+
+#include <linux/bitops.h>
+#include <drm/panthor_drm.h>
+
+#include "panthor_device.h"
+#include "panthor_fw.h"
+#include "panthor_perf.h"
+#include "panthor_regs.h"
+
+/**
+ * PANTHOR_PERF_COUNTERS_PER_BLOCK - On CSF architectures pre-11.x, the number 
of counters
+ * per block was hardcoded to be 64. Arch 11.0 onwards supports the 
PRFCNT_FEATURES GPU register,
+ * which indicates the same information.
+ */
+#define PANTHOR_PERF_COUNTERS_PER_BLOCK (64)
+
+/**
+ * PANTHOR_HW_COUNTER_SIZE - The size of a hardware counter in the FW ring 
buffer.
+ */
+#define PANTHOR_HW_COUNTER_SIZE (sizeof(u32))
+
+struct panthor_perf_counter_block {
+       struct drm_panthor_perf_block_header header;
+       u64 counters[];
+};
+
+static size_t get_annotated_block_size(size_t counters_per_block)
+{
+       return struct_size_t(struct panthor_perf_counter_block, counters, 
counters_per_block);
+}
+
+static size_t session_get_user_sample_size(const struct drm_panthor_perf_info 
*const info)
+{
+       const size_t block_size = 
get_annotated_block_size(info->counters_per_block);
+       const size_t block_nr = info->cshw_blocks + info->fw_blocks +
+               info->tiler_blocks + info->memsys_blocks + info->shader_blocks;
+
+       return info->sample_header_size + (block_size * block_nr);
+}
+
+static void panthor_perf_info_init(struct panthor_device *const ptdev)
+{
+       struct panthor_fw_global_iface *glb_iface = 
panthor_fw_get_glb_iface(ptdev);
+       struct drm_panthor_perf_info *const perf_info = &ptdev->perf_info;
+
+       if (PERFCNT_FEATURES_MD_SIZE(glb_iface->control->perfcnt_features))
+               perf_info->flags |= DRM_PANTHOR_PERF_BLOCK_STATES_SUPPORT;
+
+       if (GPU_ARCH_MAJOR(ptdev->gpu_info.gpu_id) < 11) {
+               perf_info->counters_per_block = PANTHOR_PERF_COUNTERS_PER_BLOCK;
+       } else {
+               u32 features = gpu_read(ptdev, GPU_PRFCNT_FEATURES);
+
+               perf_info->counters_per_block = 
PRFCNT_FEATURES_COUNTER_BLOCK_SIZE(features) /
+                       PANTHOR_HW_COUNTER_SIZE;
+       }
+
+       perf_info->sample_header_size = sizeof(struct 
drm_panthor_perf_sample_header);
+       perf_info->block_header_size = sizeof(struct 
drm_panthor_perf_block_header);
+
+       if (GLB_PERFCNT_FW_SIZE(glb_iface->control->perfcnt_size))
+               perf_info->fw_blocks = 1;
+
+       perf_info->cshw_blocks = 1;
+       perf_info->tiler_blocks = 1;
+       perf_info->memsys_blocks = 
GPU_MEM_FEATURES_L2_SLICES(ptdev->gpu_info.mem_features);
+       perf_info->shader_blocks = hweight64(ptdev->gpu_info.shader_present);
+
+       perf_info->sample_size = session_get_user_sample_size(perf_info);
+}
+
+/**
+ * panthor_perf_init - Initialize the performance counter subsystem.
+ * @ptdev: Panthor device
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int panthor_perf_init(struct panthor_device *ptdev)
+{
+       if (!ptdev)
+               return -EINVAL;
+
+       panthor_perf_info_init(ptdev);
+
+       return 0;
+}
diff --git a/drivers/gpu/drm/panthor/panthor_perf.h 
b/drivers/gpu/drm/panthor/panthor_perf.h
new file mode 100644
index 000000000000..3c32c24c164c
--- /dev/null
+++ b/drivers/gpu/drm/panthor/panthor_perf.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 or MIT */
+/* Copyright 2025 Collabora Ltd */
+/* Copyright 2025 Arm ltd. */
+
+#ifndef __PANTHOR_PERF_H__
+#define __PANTHOR_PERF_H__
+
+#include <linux/types.h>
+
+struct panthor_device;
+
+int panthor_perf_init(struct panthor_device *ptdev);
+
+#endif /* __PANTHOR_PERF_H__ */
+
diff --git a/drivers/gpu/drm/panthor/panthor_regs.h 
b/drivers/gpu/drm/panthor/panthor_regs.h
index 08bf06c452d6..9e66c207b9fb 100644
--- a/drivers/gpu/drm/panthor/panthor_regs.h
+++ b/drivers/gpu/drm/panthor/panthor_regs.h
@@ -27,6 +27,7 @@
 #define GPU_TILER_FEATURES                             0xC
 #define GPU_MEM_FEATURES                               0x10
 #define   GROUPS_L2_COHERENT                           BIT(0)
+#define   GPU_MEM_FEATURES_L2_SLICES(x)                        ((((x) & 
GENMASK(11, 8)) >> 8) + 1)
 
 #define GPU_MMU_FEATURES                               0x14
 #define  GPU_MMU_FEATURES_VA_BITS(x)                   ((x) & GENMASK(7, 0))
@@ -75,6 +76,8 @@
 #define GPU_FEATURES                                   0x60
 #define   GPU_FEATURES_RAY_INTERSECTION                        BIT(2)
 #define   GPU_FEATURES_RAY_TRAVERSAL                   BIT(5)
+#define GPU_PRFCNT_FEATURES                            0x68
+#define   PRFCNT_FEATURES_COUNTER_BLOCK_SIZE(x)                (((x) & 
GENMASK(7, 0)) << 8)
 
 #define GPU_TIMESTAMP_OFFSET                           0x88
 #define GPU_CYCLE_COUNT                                        0x90
-- 
2.33.0.dirty

Reply via email to