[Intel-gfx] [PATCH i-g-t 4/7] i915/gem_exec_balancer: Test parallel execbuf

2021-07-27 Thread Matthew Brost
Add basic parallel execbuf submission test which more or less just
submits the same BB in loop a which does an atomic increment to a memory
location. The memory location is checked at the end for the correct
value. Different sections use various IOCTL options (e.g. fences,
location of BBs, etc...).

In addition to above sections, an additional section ensure the ordering
of parallel submission by submitting a spinning batch to 1 individual
engine, submit a parallel execbuf to all engines instances within the
class, verify none on parallel execbuf make to hardware, release
spinner, and finally verify everything has completed.

Signed-off-by: Matthew Brost 
---
 lib/intel_reg.h|   5 +
 tests/i915/gem_exec_balancer.c | 487 +
 2 files changed, 492 insertions(+)

diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index ac1fc6cbc..146ac76c9 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2593,6 +2593,11 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define STATE3D_COLOR_FACTOR   ((0x3<<29)|(0x1d<<24)|(0x01<<16))
 
+/* Atomics */
+#define MI_ATOMIC  ((0x2f << 23) | 2)
+#define   MI_ATOMIC_INLINE_DATA (1 << 18)
+#define   MI_ATOMIC_ADD (0x7 << 8)
+
 /* Batch */
 #define MI_BATCH_BUFFER((0x30 << 23) | 1)
 #define MI_BATCH_BUFFER_START  (0x31 << 23)
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 2f98950bb..053f1d1f7 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "i915/gem.h"
 #include "i915/gem_create.h"
@@ -56,6 +57,31 @@ static size_t sizeof_load_balance(int count)
 
 #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
 
+static int
+__i915_query(int fd, struct drm_i915_query *q)
+{
+   if (igt_ioctl(fd, DRM_IOCTL_I915_QUERY, q))
+   return -errno;
+
+   return 0;
+}
+
+static int
+__i915_query_items(int fd, struct drm_i915_query_item *items, uint32_t n_items)
+{
+   struct drm_i915_query q = {
+   .num_items = n_items,
+   .items_ptr = to_user_pointer(items),
+   };
+
+   return __i915_query(fd, &q);
+}
+
+#define i915_query_items(fd, items, n_items) do { \
+   igt_assert_eq(__i915_query_items(fd, items, n_items), 0); \
+   errno = 0; \
+   } while (0)
+
 static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
 {
int fd;
@@ -2691,6 +2717,380 @@ static void nohangcheck(int i915)
close(params);
 }
 
+static void check_bo(int i915, uint32_t handle, unsigned int count, bool wait)
+{
+   uint32_t *map;
+
+   map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_READ);
+   if (wait)
+   gem_set_domain(i915, handle, I915_GEM_DOMAIN_CPU,
+  I915_GEM_DOMAIN_CPU);
+   igt_assert_eq(map[0], count);
+   munmap(map, 4096);
+}
+
+static struct drm_i915_query_engine_info *query_engine_info(int i915)
+{
+   struct drm_i915_query_engine_info *engines;
+   struct drm_i915_query_item item;
+
+#define QUERY_SIZE 0x4000
+   engines = malloc(QUERY_SIZE);
+   igt_assert(engines);
+
+   memset(engines, 0, QUERY_SIZE);
+   memset(&item, 0, sizeof(item));
+   item.query_id = DRM_I915_QUERY_ENGINE_INFO;
+   item.data_ptr = to_user_pointer(engines);
+   item.length = QUERY_SIZE;
+
+   i915_query_items(i915, &item, 1);
+   igt_assert(item.length >= 0);
+   igt_assert(item.length <= QUERY_SIZE);
+#undef QUERY_SIZE
+
+   return engines;
+}
+
+/* This function only works if siblings contains all instances of a class */
+static void logical_sort_siblings(int i915,
+ struct i915_engine_class_instance *siblings,
+ unsigned int count)
+{
+   struct i915_engine_class_instance *sorted;
+   struct drm_i915_query_engine_info *engines;
+   unsigned int i, j;
+
+   sorted = calloc(count, sizeof(*sorted));
+   igt_assert(sorted);
+
+   engines = query_engine_info(i915);
+
+   for (j = 0; j < count; ++j) {
+   for (i = 0; i < engines->num_engines; ++i) {
+   if (siblings[j].engine_class ==
+   engines->engines[i].engine.engine_class &&
+   siblings[j].engine_instance ==
+   engines->engines[i].engine.engine_instance) {
+   uint16_t logical_instance =
+   engines->engines[i].logical_instance;
+
+   igt_assert(logical_instance < count);
+   
igt_assert(!sorted[logical_instance].engine_class);
+   
igt_assert(!sorted[logical_instance].engine_instance);
+
+   sorted[logica

[Intel-gfx] [PATCH i-g-t 4/7] i915/gem_exec_balancer: Test parallel execbuf

2021-07-21 Thread Matthew Brost
Add basic parallel execbuf submission test which more or less just
submits the same BB in loop a which does an atomic increment to a memory
location. The memory location is checked at the end for the correct
value. Different sections use various IOCTL options (e.g. fences,
location of BBs, etc...).

In addition to above sections, an additional section ensure the ordering
of parallel submission by submitting a spinning batch to 1 individual
engine, submit a parallel execbuf to all engines instances within the
class, verify none on parallel execbuf make to hardware, release
spinner, and finally verify everything has completed.

Signed-off-by: Matthew Brost 
---
 lib/intel_reg.h|   5 +
 tests/i915/gem_exec_balancer.c | 487 +
 2 files changed, 492 insertions(+)

diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index ac1fc6cbc..146ac76c9 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2593,6 +2593,11 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #define STATE3D_COLOR_FACTOR   ((0x3<<29)|(0x1d<<24)|(0x01<<16))
 
+/* Atomics */
+#define MI_ATOMIC  ((0x2f << 23) | 2)
+#define   MI_ATOMIC_INLINE_DATA (1 << 18)
+#define   MI_ATOMIC_ADD (0x7 << 8)
+
 /* Batch */
 #define MI_BATCH_BUFFER((0x30 << 23) | 1)
 #define MI_BATCH_BUFFER_START  (0x31 << 23)
diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
index 2f98950bb..053f1d1f7 100644
--- a/tests/i915/gem_exec_balancer.c
+++ b/tests/i915/gem_exec_balancer.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "i915/gem.h"
 #include "i915/gem_create.h"
@@ -56,6 +57,31 @@ static size_t sizeof_load_balance(int count)
 
 #define alloca0(sz) ({ size_t sz__ = (sz); memset(alloca(sz__), 0, sz__); })
 
+static int
+__i915_query(int fd, struct drm_i915_query *q)
+{
+   if (igt_ioctl(fd, DRM_IOCTL_I915_QUERY, q))
+   return -errno;
+
+   return 0;
+}
+
+static int
+__i915_query_items(int fd, struct drm_i915_query_item *items, uint32_t n_items)
+{
+   struct drm_i915_query q = {
+   .num_items = n_items,
+   .items_ptr = to_user_pointer(items),
+   };
+
+   return __i915_query(fd, &q);
+}
+
+#define i915_query_items(fd, items, n_items) do { \
+   igt_assert_eq(__i915_query_items(fd, items, n_items), 0); \
+   errno = 0; \
+   } while (0)
+
 static bool has_class_instance(int i915, uint16_t class, uint16_t instance)
 {
int fd;
@@ -2691,6 +2717,380 @@ static void nohangcheck(int i915)
close(params);
 }
 
+static void check_bo(int i915, uint32_t handle, unsigned int count, bool wait)
+{
+   uint32_t *map;
+
+   map = gem_mmap__cpu(i915, handle, 0, 4096, PROT_READ);
+   if (wait)
+   gem_set_domain(i915, handle, I915_GEM_DOMAIN_CPU,
+  I915_GEM_DOMAIN_CPU);
+   igt_assert_eq(map[0], count);
+   munmap(map, 4096);
+}
+
+static struct drm_i915_query_engine_info *query_engine_info(int i915)
+{
+   struct drm_i915_query_engine_info *engines;
+   struct drm_i915_query_item item;
+
+#define QUERY_SIZE 0x4000
+   engines = malloc(QUERY_SIZE);
+   igt_assert(engines);
+
+   memset(engines, 0, QUERY_SIZE);
+   memset(&item, 0, sizeof(item));
+   item.query_id = DRM_I915_QUERY_ENGINE_INFO;
+   item.data_ptr = to_user_pointer(engines);
+   item.length = QUERY_SIZE;
+
+   i915_query_items(i915, &item, 1);
+   igt_assert(item.length >= 0);
+   igt_assert(item.length <= QUERY_SIZE);
+#undef QUERY_SIZE
+
+   return engines;
+}
+
+/* This function only works if siblings contains all instances of a class */
+static void logical_sort_siblings(int i915,
+ struct i915_engine_class_instance *siblings,
+ unsigned int count)
+{
+   struct i915_engine_class_instance *sorted;
+   struct drm_i915_query_engine_info *engines;
+   unsigned int i, j;
+
+   sorted = calloc(count, sizeof(*sorted));
+   igt_assert(sorted);
+
+   engines = query_engine_info(i915);
+
+   for (j = 0; j < count; ++j) {
+   for (i = 0; i < engines->num_engines; ++i) {
+   if (siblings[j].engine_class ==
+   engines->engines[i].engine.engine_class &&
+   siblings[j].engine_instance ==
+   engines->engines[i].engine.engine_instance) {
+   uint16_t logical_instance =
+   engines->engines[i].logical_instance;
+
+   igt_assert(logical_instance < count);
+   
igt_assert(!sorted[logical_instance].engine_class);
+   
igt_assert(!sorted[logical_instance].engine_instance);
+
+   sorted[logica