i915 RFC PMU:
* https://patchwork.freedesktop.org/series/27488/
* https://patchwork.freedesktop.org/series/28842/

Tests:
* init: try to initialize all possible metrics exposed in i915 PMU
  (limit to 0-instance of engines)
* invalid_init: verify that i915 PMU correctly error out on invalid
  initialization
* single: verify that BUSY metrics work for each engine
* parallel: verify that parallel requests for metrics do not conflict

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozh...@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursu...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
---
 tests/Makefile.sources |   1 +
 tests/perf_pmu.c       | 546 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 547 insertions(+)
 create mode 100644 tests/perf_pmu.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index bb013c7..51b684b 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -215,6 +215,7 @@ TESTS_progs = \
        kms_vblank \
        meta_test \
        perf \
+       perf_pmu \
        pm_backlight \
        pm_lpsp \
        pm_rc6_residency \
diff --git a/tests/perf_pmu.c b/tests/perf_pmu.c
new file mode 100644
index 0000000..0d025a6
--- /dev/null
+++ b/tests/perf_pmu.c
@@ -0,0 +1,546 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_sysfs.h"
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/time.h>
+#include <time.h>
+#include "drm.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+////////////////////////////////////////////////////////////////////////
+// This is a copy of perf.h from intel-gpu-tools/overlay
+// because I am lazy enough to move it to some common library
+////////////////////////////////////////////////////////////////////////
+
+#include <linux/perf_event.h>
+
+enum drm_i915_gem_engine_class {
+       I915_ENGINE_CLASS_OTHER = 0,
+       I915_ENGINE_CLASS_RENDER = 1,
+       I915_ENGINE_CLASS_COPY = 2,
+       I915_ENGINE_CLASS_VIDEO = 3,
+       I915_ENGINE_CLASS_VIDEO_ENHANCE = 4,
+       I915_ENGINE_CLASS_MAX /* non-ABI */
+};
+
+enum drm_i915_pmu_engine_sample {
+       I915_SAMPLE_QUEUED = 0,
+       I915_SAMPLE_BUSY = 1,
+       I915_SAMPLE_WAIT = 2,
+       I915_SAMPLE_SEMA = 3
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+       (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+       ((class) << I915_PMU_CLASS_SHIFT | \
+       (instance) << I915_PMU_SAMPLE_BITS | \
+       (sample))
+
+#define I915_PMU_ENGINE_QUEUED(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+       __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY      __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
+#define I915_PMU_ENERGY                        __I915_PMU_OTHER(2)
+#define I915_PMU_INTERRUPTS            __I915_PMU_OTHER(3)
+
+#define I915_PMU_RC6_RESIDENCY         __I915_PMU_OTHER(4)
+#define I915_PMU_RC6p_RESIDENCY                __I915_PMU_OTHER(5)
+#define I915_PMU_RC6pp_RESIDENCY       __I915_PMU_OTHER(6)
+
+static inline int
+perf_event_open(struct perf_event_attr *attr,
+               pid_t pid,
+               int cpu,
+               int group_fd,
+               unsigned long flags)
+{
+#ifndef __NR_perf_event_open
+#if defined(__i386__)
+#define __NR_perf_event_open 336
+#elif defined(__x86_64__)
+#define __NR_perf_event_open 298
+#else
+#define __NR_perf_event_open 0
+#endif
+#endif
+    attr->size = sizeof(*attr);
+    return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static uint64_t i915_type_id(void)
+{
+       char buf[1024];
+       int fd, n;
+
+       fd = open("/sys/bus/event_source/devices/i915/type", 0);
+       if (fd < 0) {
+               n = -1;
+       } else {
+               n = read(fd, buf, sizeof(buf)-1);
+               close(fd);
+       }
+       if (n < 0)
+               return 0;
+
+       buf[n] = '\0';
+       return strtoull(buf, 0, 0);
+}
+
+////////////////////////////////////////////////////////////////////////
+
+static double elapsed(const struct timespec *start, const struct timespec *end)
+{
+       return ((end->tv_sec - start->tv_sec) +
+               (end->tv_nsec - start->tv_nsec)*1e-9);
+}
+
+static uint64_t elapsed_ns(const struct timespec *start, const struct timespec 
*end)
+{
+       return ((end->tv_sec - start->tv_sec)*1e9 +
+               (end->tv_nsec - start->tv_nsec));
+}
+
+static void nop_on_ring(int fd, uint32_t handle, unsigned ring_id, int timeout)
+{
+       struct drm_i915_gem_execbuffer2 execbuf;
+       struct drm_i915_gem_exec_object2 obj;
+       struct timespec start, now;
+
+       gem_require_ring(fd, ring_id);
+
+       memset(&obj, 0, sizeof(obj));
+       obj.handle = handle;
+
+       memset(&execbuf, 0, sizeof(execbuf));
+       execbuf.buffers_ptr = to_user_pointer(&obj);
+       execbuf.buffer_count = 1;
+       execbuf.flags = ring_id;
+       execbuf.flags |= LOCAL_I915_EXEC_HANDLE_LUT;
+       execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       if (__gem_execbuf(fd, &execbuf)) {
+               execbuf.flags = ring_id;
+               gem_execbuf(fd, &execbuf);
+       }
+
+       do {
+               for (int loop = 0; loop < 1024; loop++) {
+                       gem_execbuf(fd, &execbuf);
+               }
+               clock_gettime(CLOCK_MONOTONIC, &now);
+       } while (elapsed(&start, &now) < timeout);
+       gem_sync(fd, handle);
+}
+
+static int perf_i915_open(int config, int group, int read_format)
+{
+       struct perf_event_attr attr;
+
+       memset(&attr, 0, sizeof (attr));
+
+       attr.type = i915_type_id();
+       if (attr.type == 0)
+               return -ENOENT;
+       attr.config = config;
+
+       attr.read_format = read_format;
+       if (group != -1)
+               attr.read_format &= ~PERF_FORMAT_GROUP;
+
+       return perf_event_open(&attr, -1, 0, group, 0);
+}
+
+struct metric {
+       int config;
+       uint64_t old_value;
+       uint64_t value;
+};
+
+struct pmu_metrics {
+       int fd;
+       int read_format;
+       int num_metrics;
+       struct metric* metrics;
+};
+
+static int perf_init(struct pmu_metrics *pm, int num_configs, int* configs)
+{
+       int i, res;
+
+       memset(pm, 0, sizeof(struct pmu_metrics));
+       pm->fd = -1;
+       pm->read_format =
+               PERF_FORMAT_TOTAL_TIME_ENABLED |
+               PERF_FORMAT_GROUP;
+       pm->metrics = (struct metric*)calloc(num_configs, sizeof(struct 
metric));
+       if (!pm->metrics)
+               return -1;
+
+       for (i = 0; i < num_configs; ++i) {
+               if (pm->fd < 0)
+                       res = pm->fd = perf_i915_open(configs[i], -1, 
pm->read_format);
+               else
+                       res = perf_i915_open(configs[i], pm->fd, 
pm->read_format);
+               if (res >= 0) {
+                       pm->metrics[pm->num_metrics++].config = configs[i];
+               }
+       }
+
+       igt_info("perf_init: enabled %d metrics from %d requested\n",
+               pm->num_metrics, num_configs);
+
+       return 0;
+}
+
+static void perf_close(struct pmu_metrics *pm)
+{
+       if (pm->fd != -1 ) { close(pm->fd); pm->fd = -1; }
+       if (pm->metrics) { free(pm->metrics); pm->metrics= NULL; }
+}
+
+/* see 'man 2 perf_event_open' */
+struct perf_read_format {
+       uint64_t nr_values;     /* The number of events */
+       uint64_t time_enabled;  /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+       struct {
+               uint64_t value;     /* The value of the event */
+       } values[1024];
+};
+
+static int perf_read(struct pmu_metrics *pm)
+{
+       int read_format =
+               PERF_FORMAT_TOTAL_TIME_ENABLED |
+               PERF_FORMAT_GROUP;
+       struct perf_read_format data;
+       ssize_t len;
+       int i;
+
+       if (pm->fd < 0)
+               return -1;
+
+       if (pm->read_format != read_format)
+               return -1;
+
+       len = read(pm->fd, &data, sizeof(data));
+       if (len < 0) {
+               return -1;
+       }
+
+       if (pm->num_metrics != data.nr_values)
+               return -1;
+
+       for (i = 0; i < data.nr_values; ++i) {
+               pm->metrics[i].old_value = pm->metrics[i].value;
+               pm->metrics[i].value = data.values[i].value;
+       }
+
+       return 0;
+}
+
+static const char* perf_get_metric_name(int config)
+{
+       switch (config) {
+               case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0):
+                       return "i915/rcs0-busy/";
+               case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0):
+                       return "i915/vcs0-busy/";
+               case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1):
+                       return "i915/vcs1-busy/";
+               case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0):
+                       return "i915/bcs0-busy/";
+               case I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0):
+                       return "i915/vecs0-busy/";
+               default:
+                       return "i915/unknown/";
+       }
+}
+
+static uint64_t perf_elapsed(struct metric* m)
+{
+       return m->value - m->old_value;
+}
+
+static void test_init(void)
+{
+       struct pmu_metrics pm;
+       unsigned int class[] =
+       {
+               I915_ENGINE_CLASS_RENDER,
+               I915_ENGINE_CLASS_VIDEO,
+               I915_ENGINE_CLASS_VIDEO,
+               I915_ENGINE_CLASS_COPY,
+               I915_ENGINE_CLASS_VIDEO_ENHANCE,
+       };
+       int* configs = malloc(1024 * sizeof(int));
+       int num_configs = 0;
+
+       igt_assert(configs != NULL);
+
+       for (int i=0; i < sizeof(class)/sizeof(class[0]); ++i) {
+               /* TODO Adding metrics for 0-instances only. Would be nice
+                * to get everything, but for that we either need to add
+                * check for different platforms here or use upcoming
+                * engines discover API.
+                */
+               configs[num_configs++] = I915_PMU_ENGINE_BUSY(class[i], 0);
+               configs[num_configs++] = I915_PMU_ENGINE_QUEUED(class[i], 0);
+               configs[num_configs++] = I915_PMU_ENGINE_WAIT(class[i], 0);
+               configs[num_configs++] = I915_PMU_ENGINE_SEMA(class[i], 0);
+       }
+       configs[num_configs++] = I915_PMU_ACTUAL_FREQUENCY;
+       configs[num_configs++] = I915_PMU_REQUESTED_FREQUENCY;
+       configs[num_configs++] = I915_PMU_ENERGY;
+       configs[num_configs++] = I915_PMU_RC6_RESIDENCY;
+       configs[num_configs++] = I915_PMU_RC6p_RESIDENCY;
+       configs[num_configs++] = I915_PMU_RC6pp_RESIDENCY;
+
+       igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+       igt_assert_eq(perf_read(&pm), 0);
+       igt_assert_eq(pm.num_metrics, num_configs);
+
+       perf_close(&pm);
+}
+
+/* Tests that i915 PMU corectly error out in invalid initialization.
+ * i915 PMU is uncore PMU, thus:
+ *  - sampling period is not supported
+ *  - pid > 0 is not supported since we can't count per-process (we count
+ *    per whole system(
+ *  - cpu != 0 is not supported since i915 PMU exposes cpumask for CPU0
+ */
+static void test_invalid_init(void)
+{
+       struct perf_event_attr attr;
+       int pid, cpu;
+
+#define ATTR_INIT() \
+       do { \
+               memset(&attr, 0, sizeof (attr)); \
+               attr.config = I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 
0); \
+               attr.type = i915_type_id(); \
+               igt_assert(attr.type != 0); \
+       } while(0)
+
+       ATTR_INIT();
+       attr.sample_period = 100;
+       pid = -1;
+       cpu = 0;
+       igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+       igt_assert_eq(errno, EINVAL);
+       
+       ATTR_INIT();
+       pid = 0;
+       cpu = 0;
+       igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+       igt_assert_eq(errno, EINVAL);
+
+       ATTR_INIT();
+       pid = -1;
+       cpu = 1;
+       igt_assert_eq(perf_event_open(&attr, pid, cpu, -1, 0), -1);
+       igt_assert_eq(errno, ENODEV);
+}
+
+static int test_single(int fd, uint32_t handle)
+{
+       struct {
+               const char* engine_name;
+               unsigned int class;
+               unsigned int instance;
+               unsigned int ring_id;
+       } engines[] = {
+               { "rcs0", I915_ENGINE_CLASS_RENDER, 0, I915_EXEC_RENDER },
+               { "vcs0", I915_ENGINE_CLASS_VIDEO, 0, I915_EXEC_BSD | 
I915_EXEC_BSD_RING1 },
+               { "vcs1", I915_ENGINE_CLASS_VIDEO, 1, I915_EXEC_BSD | 
I915_EXEC_BSD_RING2 },
+               { "bcs0", I915_ENGINE_CLASS_COPY, 0, I915_EXEC_BLT },
+               { "vecs0", I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, I915_EXEC_VEBOX 
},
+       };
+       struct pmu_metrics pm;
+       int configs[] = {
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+       };
+       int num_configs = sizeof(configs)/sizeof(configs[0]);
+       struct timespec start, now;
+
+       igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+       igt_assert_eq(pm.num_metrics, num_configs);
+
+       for (int i = 0; i < sizeof(engines)/sizeof(engines[0]); ++i) {
+               clock_gettime(CLOCK_MONOTONIC, &start);
+               igt_assert_eq(perf_read(&pm), 0);
+
+               /* Create almost 100% load on the examined engine for specified 
time. */
+               nop_on_ring(fd, handle, engines[i].ring_id, 20);
+
+               igt_assert_eq(perf_read(&pm), 0);
+               clock_gettime(CLOCK_MONOTONIC, &now);
+
+               igt_info("Executed on %s for %ldus\n", engines[i].engine_name, 
elapsed_ns(&start, &now));
+               for (int j = 0; j < num_configs; ++j) {
+                       igt_info("  %s: %ldus\n", 
perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+                       igt_assert(perf_elapsed(&pm.metrics[j]) < 
elapsed_ns(&start, &now));
+
+                       if (configs[j] == 
I915_PMU_ENGINE_BUSY(engines[i].class, engines[i].instance)) {
+                               /* Check that the loaded engine had almost 100% 
load, we will have 1% tolerance. */
+                               igt_assert(perf_elapsed(&pm.metrics[j]) > 0.99 
* elapsed_ns(&start, &now));
+                       } else if (configs[j] == 
I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0)) {
+                               /* Check that BCS engine had just tiny load, we 
will have 1% tolerance.
+                                * NOTE Some load on BCS is non-avoidable if 
you run under any graphical server,
+                                * so we can't check for zero.
+                                */
+                               igt_assert(perf_elapsed(&pm.metrics[j]) < 0.01 
* elapsed_ns(&start, &now));
+                       } else {
+                               /* Check that other engines did not have any 
load.
+                                * NOTE This may fail if you have any other 
workload running in parallel to this test.
+                                */
+                               igt_assert_eq(perf_elapsed(&pm.metrics[j]), 0);
+                       }
+               }
+       }
+       perf_close(&pm);
+
+       /* Return how many angines we have tried. */
+       return sizeof(engines)/sizeof(engines[0]);
+}
+
+static void test_parallel(int fd, uint32_t handle)
+{
+       struct pmu_metrics pm;
+       int configs[] = {
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_RENDER, 0),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 0),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO, 1),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_COPY, 0),
+               I915_PMU_ENGINE_BUSY(I915_ENGINE_CLASS_VIDEO_ENHANCE, 0),
+       };
+       int num_configs = sizeof(configs)/sizeof(configs[0]);
+       int num_engines;
+       struct timespec start, now;
+
+       igt_assert_eq(perf_init(&pm, num_configs, configs), 0);
+       igt_assert_eq(pm.num_metrics, num_configs);
+
+       clock_gettime(CLOCK_MONOTONIC, &start);
+       igt_assert_eq(perf_read(&pm), 0);
+
+       /* Create almost 100% load on the engines one by one, we will get back
+        * how many engines were tried.
+        */
+       num_engines = test_single(fd, handle);
+
+       igt_assert_eq(perf_read(&pm), 0);
+       clock_gettime(CLOCK_MONOTONIC, &now);
+
+       igt_info("Executed on %d engines for %ldus\n", num_engines, 
elapsed_ns(&start, &now));
+       for (int j = 0; j < num_configs; ++j) {
+               igt_info("  %s: %ldus\n", 
perf_get_metric_name(pm.metrics[j].config), perf_elapsed(&pm.metrics[j]));
+
+               /* Since engines were loaded in turns one by one for the barely 
the same time,
+                * they each should have produced barely the same load 
proportional to the
+                * number of engines.
+                */
+               igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines > 0.99 * 
elapsed_ns(&start, &now));
+               igt_assert(perf_elapsed(&pm.metrics[j]) * num_engines < 1.01 * 
elapsed_ns(&start, &now));
+       }
+       perf_close(&pm);
+}
+
+igt_main
+{
+       uint32_t handle = 0;
+       int device = -1;
+
+       igt_fixture {
+               const uint32_t bbe = MI_BATCH_BUFFER_END;
+
+               device = drm_open_driver(DRIVER_INTEL);
+               igt_require_gem(device);
+
+               handle = gem_create(device, 4096);
+               gem_write(device, handle, 0, &bbe, sizeof(bbe));
+
+               igt_fork_hang_detector(device);
+       }
+
+       /* Test that we can intialize all the metrics. */
+       igt_subtest_f("init")
+               test_init();
+
+       /* Test that we can intialize all the metrics. */
+       igt_subtest_f("invalid_init")
+               test_invalid_init();
+
+       /* Test single metrics consumet. */
+       igt_subtest_f("single")
+               test_single(device, handle);
+
+       /* Test parallel metrics consumers. */
+       igt_subtest_f("parallel")
+               test_parallel(device, handle);
+
+       igt_fixture {
+               igt_stop_hang_detector();
+               gem_close(device, handle);
+               close(device);
+       }
+}
-- 
1.8.3.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to