Generalized from auto-tuned GPU dummy workload in gem_wait and kms_flip

Signed-off-by: Abdiel Janulgue <abdiel.janul...@linux.intel.com>
---
 lib/Makefile.sources |   2 +
 lib/igt.h            |   1 +
 lib/igt_dummyload.c  | 419 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/igt_dummyload.h  |  63 ++++++++
 4 files changed, 485 insertions(+)
 create mode 100644 lib/igt_dummyload.c
 create mode 100644 lib/igt_dummyload.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index e8e277b..7fc5ec2 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -75,6 +75,8 @@ lib_source_list =             \
        igt_draw.h              \
        igt_pm.c                \
        igt_pm.h                \
+       igt_dummyload.c         \
+       igt_dummyload.h         \
        uwildmat/uwildmat.h     \
        uwildmat/uwildmat.c     \
        $(NULL)
diff --git a/lib/igt.h b/lib/igt.h
index d751f24..a0028d5 100644
--- a/lib/igt.h
+++ b/lib/igt.h
@@ -32,6 +32,7 @@
 #include "igt_core.h"
 #include "igt_debugfs.h"
 #include "igt_draw.h"
+#include "igt_dummyload.h"
 #include "igt_fb.h"
 #include "igt_gt.h"
 #include "igt_kms.h"
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
new file mode 100644
index 0000000..908d839
--- /dev/null
+++ b/lib/igt_dummyload.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+#include <time.h>
+
+/**
+ * SECTION:igt_dummyload
+ * @short_description: Library for submitting auto-tuned dummy GPU workloads
+ * @title: Dummyload
+ * @include: igt.h
+ *
+ * A lot of igt testcases need some dummy load to make sure a race window is
+ * big enough. Unfortunately having a fixed amount of workload leads to
+ * spurious test failures or overtly long runtimes on some fast/slow platforms.
+ * This library contains functionality to submit GPU workloads that is
+ * dynamically tuned to consume a specific amount of time.
+ */
+
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+
+/* Internal data structures to avoid having to pass tons of parameters
+ * around. */
+struct dummy_info {
+       drm_intel_bufmgr *bufmgr;
+       struct intel_batchbuffer *batch;
+       int drm_fd;
+       uint32_t buf_handle;
+       uint32_t buf_stride;
+       uint32_t buf_tiling;
+       int fb_width;
+       int fb_height;
+};
+
+static void blit_copy(struct intel_batchbuffer *batch,
+                     drm_intel_bo *dst, drm_intel_bo *src,
+                     unsigned int width, unsigned int height,
+                     unsigned int dst_pitch, unsigned int src_pitch)
+{
+       BLIT_COPY_BATCH_START(0);
+       OUT_BATCH((3 << 24) | /* 32 bits */
+                 (0xcc << 16) | /* copy ROP */
+                 dst_pitch);
+       OUT_BATCH(0 << 16 | 0);
+       OUT_BATCH(height << 16 | width);
+       OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 
0);
+       OUT_BATCH(0 << 16 | 0);
+       OUT_BATCH(src_pitch);
+       OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+       ADVANCE_BATCH();
+
+       if (batch->gen >= 6) {
+               BEGIN_BATCH(3, 0);
+               OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+               OUT_BATCH(0);
+               OUT_BATCH(0);
+               ADVANCE_BATCH();
+       }
+}
+
+static void blit_fill(struct intel_batchbuffer *batch, drm_intel_bo *dst,
+                     unsigned int width, unsigned int height)
+{
+       COLOR_BLIT_COPY_BATCH_START(COLOR_BLT_WRITE_ALPHA |
+                                   XY_COLOR_BLT_WRITE_RGB);
+       OUT_BATCH((3 << 24)     | /* 32 Bit Color */
+                 (0xF0 << 16)  | /* Raster OP copy background register */
+                 0);             /* Dest pitch is 0 */
+       OUT_BATCH(0);
+       OUT_BATCH(width << 16   |
+                 height);
+       OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 
0);
+       OUT_BATCH(rand()); /* random pattern */
+       ADVANCE_BATCH();
+}
+
+static int emit_dummy_load_blitcopy(struct dummy_info *d, int limit, int 
timeout)
+{
+       int i, ret = 0;
+       drm_intel_bo *src_bo, *dst_bo, *fb_bo;
+       struct intel_batchbuffer *batch = d->batch;
+       drm_intel_bufmgr *bufmgr = d->bufmgr;
+
+       igt_require(bufmgr);
+
+       src_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
+       igt_assert(src_bo);
+
+       dst_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
+       igt_assert(dst_bo);
+
+       fb_bo = gem_handle_to_libdrm_bo(bufmgr, d->drm_fd, "imported",
+                                       d->buf_handle);
+       igt_assert(fb_bo);
+
+       for (i = 0; i < limit; i++) {
+               blit_copy(batch, dst_bo, src_bo,
+                         2048, 2048,
+                         2048*4, 2048*4);
+               igt_swap(src_bo, dst_bo);
+       }
+       blit_copy(batch, fb_bo, src_bo,
+                 min(d->fb_width, 2048), min(d->fb_height, 2048),
+                 d->buf_stride, 2048*4);
+
+       intel_batchbuffer_flush(batch);
+
+       if (timeout > 0)
+               ret = drm_intel_gem_bo_wait(fb_bo, timeout * NSEC_PER_SEC);
+       drm_intel_bo_unreference(src_bo);
+       drm_intel_bo_unreference(dst_bo);
+       drm_intel_bo_unreference(fb_bo);
+
+       return ret;
+}
+
+static int emit_dummy_load_blitfill(struct dummy_info *d, int limit, int 
timeout)
+{
+       int i, ret = 0;
+       struct intel_batchbuffer *batch = d->batch;
+       drm_intel_bufmgr *bufmgr = d->bufmgr;
+       drm_intel_bo *dst_bo = gem_handle_to_libdrm_bo(bufmgr, d->drm_fd, "",
+                                                      d->buf_handle);
+       igt_require(bufmgr);
+       igt_assert(dst_bo);
+
+       for (i = 0; i < limit; i++) {
+               blit_fill(batch, dst_bo,
+                         min(d->fb_width, dst_bo->size/2),
+                         min(d->fb_height, dst_bo->size/2));
+       }
+       intel_batchbuffer_flush(batch);
+
+       if (timeout > 0)
+               ret = drm_intel_gem_bo_wait(dst_bo, timeout * NSEC_PER_SEC);
+       drm_intel_bo_unreference(dst_bo);
+
+       return ret;
+}
+
+static int emit_dummy_load_rendercopy(struct dummy_info *d, int limit, int 
timeout)
+{
+       struct intel_batchbuffer *batch = d->batch;
+       drm_intel_bufmgr *bufmgr = d->bufmgr;
+       static uint32_t devid = 0;
+       igt_render_copyfunc_t copyfunc;
+       struct igt_buf sb[3], *src, *dst, *fb;
+       int i, ret = 0;
+
+       igt_require(bufmgr);
+
+       if (!devid)
+               devid = intel_get_drm_devid(d->drm_fd);
+       copyfunc = igt_get_render_copyfunc(devid);
+       if (copyfunc == NULL)
+               return emit_dummy_load_blitfill(d, limit, timeout);
+
+       sb[0].bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
+       igt_assert(sb[0].bo);
+       sb[0].size = sb[0].bo->size;
+       sb[0].tiling = I915_TILING_NONE;
+       sb[0].data = NULL;
+       sb[0].num_tiles = sb[0].bo->size;
+       sb[0].stride = 4 * 2048;
+
+       sb[1].bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
+       igt_assert(sb[1].bo);
+       sb[1].size = sb[1].bo->size;
+       sb[1].tiling = I915_TILING_NONE;
+       sb[1].data = NULL;
+       sb[1].num_tiles = sb[1].bo->size;
+       sb[1].stride = 4 * 2048;
+
+       sb[2].bo = gem_handle_to_libdrm_bo(bufmgr, d->drm_fd, "imported",
+                                          d->buf_handle);
+       igt_assert(sb[2].bo);
+       sb[2].size = sb[2].bo->size;
+       sb[2].tiling = d->buf_tiling;
+       sb[2].data = NULL;
+       sb[2].num_tiles = sb[2].bo->size;
+       sb[2].stride = d->buf_stride;
+
+       src = &sb[0];
+       dst = &sb[1];
+       fb = &sb[2];
+
+       for (i = 0; i < limit; i++) {
+               copyfunc(batch, NULL,
+                        src, 0, 0,
+                        2048, 2048,
+                        dst, 0, 0);
+
+               igt_swap(src, dst);
+       }
+       copyfunc(batch, NULL,
+                src, 0, 0,
+                min(d->fb_width, 2048), min(d->fb_height, 2048),
+                fb, 0, 0);
+       intel_batchbuffer_flush(batch);
+
+       if (timeout > 0)
+               ret = drm_intel_gem_bo_wait(fb->bo, timeout * NSEC_PER_SEC);
+       drm_intel_bo_unreference(sb[0].bo);
+       drm_intel_bo_unreference(sb[1].bo);
+       drm_intel_bo_unreference(sb[2].bo);
+
+       return ret;
+}
+
+static unsigned long gettime_us(void)
+{
+       struct timespec ts;
+
+       clock_gettime(CLOCK_MONOTONIC, &ts);
+
+       return ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+}
+
+static int calibrate_dummy_load(struct dummy_info *d,
+                               const char *ring_name,
+                               int enough_work_in_seconds,
+                               int (*emit)(struct dummy_info *d, int limit, 
int timeout))
+{
+       unsigned long start;
+       int ops = 1;
+
+       do {
+               unsigned long diff;
+               int ret;
+               start = gettime_us();
+               ret = emit(d, ops, 10);
+               diff = gettime_us() - start;
+
+               if (ret || diff / USEC_PER_SEC > enough_work_in_seconds)
+                 break;
+               ops += ops;
+       } while (ops < 100000);
+
+       igt_debug("%s dummy load calibrated: %d operations / second\n",
+                 ring_name, ops);
+
+       return ops;
+}
+
+static void igt_dummy_load_blitcopy(struct dummy_info* d, int seconds)
+{
+       static int ops_per_sec = 0;
+
+       if (ops_per_sec == 0)
+               ops_per_sec = calibrate_dummy_load(d, "bcs", seconds,
+                                                  emit_dummy_load_blitcopy);
+
+       emit_dummy_load_blitcopy(d, seconds * ops_per_sec, 0);
+}
+
+static void igt_dummy_load_blitfill(struct dummy_info* d, int seconds)
+{
+       static int ops_per_sec = 0;
+
+       if (ops_per_sec == 0)
+               ops_per_sec = calibrate_dummy_load(d, "bcs", seconds,
+                                                  emit_dummy_load_blitfill);
+
+       emit_dummy_load_blitfill(d, seconds * ops_per_sec, 0);
+}
+
+static void igt_dummy_load_rendercopy(struct dummy_info* d, int seconds)
+{
+       static int ops_per_sec = 0;
+
+       if (ops_per_sec == 0)
+               ops_per_sec = calibrate_dummy_load(d, "rcs", seconds,
+                                                  emit_dummy_load_rendercopy);
+
+       emit_dummy_load_rendercopy(d, seconds * ops_per_sec, 0);
+}
+
+/**
+ * igt_calibrate_dummy_load:
+ * @bufmgr: the libdrm bufmgr
+ * @batch: the batchbuffer
+ * @drm_fd: the DRM file descriptor
+ * @buf_handle: handle of the destination buffer where the operation is 
applied.
+ *              For IGT_DUMMY_BLIT_COPY and IGT_DUMMY_RENDER_COPY this
+ *              is the destination buffer where final results are copied into
+ * @buf_stride: the stride of the buffer, ignored by IGT_DUMMY_BLIT_FILL
+ * @fb_width: width of the rectangle
+ * @fb_height: height of the rectangle
+ * @enough_work_in_seconds: time it takes to execute a GPU workload
+ * @method: Type of GPU workload
+ *
+ * This function returns the amount of operations a GPU workload executes in
+ * a specific amount of time.
+ */
+int igt_calibrate_dummy_load(drm_intel_bufmgr *bufmgr,
+                            struct intel_batchbuffer *batch,
+                            int drm_fd,
+                            uint32_t buf_handle,
+                            uint32_t buf_stride,
+                            int fb_width,
+                            int fb_height,
+                            int enough_work_in_seconds,
+                            enum igt_dummy_load_method method)
+{
+       struct dummy_info dummy_info = {
+               .bufmgr = bufmgr,
+               .batch = batch,
+               .drm_fd = drm_fd,
+               .buf_handle = buf_handle,
+               .buf_stride = buf_stride,
+               .fb_width = fb_width,
+               .fb_height = fb_height,
+       };
+
+       switch (method) {
+       case IGT_DUMMY_RENDER_COPY:
+               return calibrate_dummy_load(&dummy_info, "rcs",
+                                           enough_work_in_seconds,
+                                           emit_dummy_load_rendercopy);
+       case IGT_DUMMY_BLIT_COPY:
+               return calibrate_dummy_load(&dummy_info, "bcs",
+                                           enough_work_in_seconds,
+                                           emit_dummy_load_blitcopy);
+       case IGT_DUMMY_BLIT_FILL:
+               return calibrate_dummy_load(&dummy_info, "bcs",
+                                           enough_work_in_seconds,
+                                           emit_dummy_load_blitfill);
+       default:
+               igt_assert(false);
+       }
+}
+
+/**
+ * igt_emit_dummy_load:
+ * @bufmgr: the libdrm bufmgr
+ * @batch: the batchbuffer
+ * @drm_fd: the DRM file descriptor
+ * @buf_handle: handle of the destination buffer where the operation is 
applied.
+ *              For IGT_DUMMY_BLIT_COPY and IGT_DUMMY_RENDER_COPY this
+ *              is the destination buffer where final results are copied into
+ * @buf_stride: the stride of the buffer, ignored by IGT_DUMMY_BLIT_FILL
+ * @fb_width: width of the rectangle
+ * @fb_height: height of the rectangle
+ * @iterations: manually specify the amount of operations that the dummy load
+ *              executes. If less than 1, automatically determine the amount of
+ *              iterations it takes to execute @enough_work_in_seconds
+ *              seconds of GPU workload
+ * @enough_work_in_seconds: time it takes to execute a GPU workload. Ignored 
when
+ *              auto-calibration is disabled (@iterations >= 1)
+ * @method: Type of GPU workload
+ *
+ * This functions submits a dummy workload to the GPU optionally auto-tuning it
+ * so the workload consumes a specific amount of time.
+ */
+void igt_emit_dummy_load(drm_intel_bufmgr *bufmgr,
+                        struct intel_batchbuffer *batch,
+                        int drm_fd,
+                        uint32_t buf_handle,
+                        uint32_t buf_stride,
+                        int fb_width,
+                        int fb_height,
+                        int iterations,
+                        int enough_work_in_seconds,
+                        enum igt_dummy_load_method method)
+{
+       struct dummy_info dummy_info = {
+               .bufmgr = bufmgr,
+               .batch = batch,
+               .drm_fd = drm_fd,
+               .buf_handle = buf_handle,
+               .buf_stride = buf_stride,
+               .fb_width = fb_width,
+               .fb_height = fb_height,
+       };
+
+       switch (method) {
+       case IGT_DUMMY_RENDER_COPY:
+               iterations > 0 ? emit_dummy_load_rendercopy(&dummy_info,
+                                                           iterations, 0):
+               igt_dummy_load_rendercopy(&dummy_info, enough_work_in_seconds);
+               break;
+       case IGT_DUMMY_BLIT_COPY:
+               iterations > 0 ? emit_dummy_load_blitcopy(&dummy_info,
+                                                         iterations, 0):
+               igt_dummy_load_blitcopy(&dummy_info, enough_work_in_seconds);
+               break;
+       case IGT_DUMMY_BLIT_FILL:
+               iterations > 0 ? emit_dummy_load_blitfill(&dummy_info,
+                                                         iterations, 0):
+               igt_dummy_load_blitfill(&dummy_info, enough_work_in_seconds);
+               break;
+       default:
+               igt_assert(false);
+               break;
+       }
+}
diff --git a/lib/igt_dummyload.h b/lib/igt_dummyload.h
new file mode 100644
index 0000000..9e40fd1
--- /dev/null
+++ b/lib/igt_dummyload.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __IGT_DUMMYLOAD_H__
+#define __IGT_DUMMYLOAD_H__
+
+/**
+ * igt_dummy_load_method:
+ * @IGT_DUMMY_BLIT_FILL: Use blitter engine to fill a buffer with random color
+ * @IGT_DUMMY_BLIT_COPY: Use blitter engine to copy between buffers
+ * @IGT_DUMMY_RENDER_COPY: Use render engine to copy between buffers
+ *
+ * Method to generate a GPU dummy load
+ */
+enum igt_dummy_load_method {
+       IGT_DUMMY_BLIT_FILL,
+       IGT_DUMMY_BLIT_COPY,
+       IGT_DUMMY_RENDER_COPY,
+};
+
+int igt_calibrate_dummy_load(drm_intel_bufmgr *bufmgr,
+                            struct intel_batchbuffer *batch,
+                            int drm_fd,
+                            uint32_t buf_handle,
+                            uint32_t buf_stride,
+                            int fb_width,
+                            int fb_height,
+                            int enough_work_in_seconds,
+                            enum igt_dummy_load_method method);
+
+void igt_emit_dummy_load(drm_intel_bufmgr *bufmgr,
+                        struct intel_batchbuffer *batch,
+                        int drm_fd,
+                        uint32_t buf_handle,
+                        uint32_t buf_stride,
+                        int fb_width,
+                        int fb_height,
+                        int iterations,
+                        int enough_work_in_seconds,
+                        enum igt_dummy_load_method method);
+
+#endif /* __IGT_DUMMYLOAD_H__ */
-- 
2.7.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to