Re: [Intel-gfx] [PATCH i-g-t 3/6] igt/gem_ctx_thrash: Order writes between contexts

2018-05-15 Thread Tvrtko Ursulin


On 15/05/2018 09:29, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2018-05-15 09:20:13)


On 14/05/2018 16:10, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2018-05-14 11:59:09)


On 14/05/2018 09:02, Chris Wilson wrote:

The test wrote to the same dwords from multiple contexts, assuming that
the writes would be ordered by its submission. However, as it was using
multiple contexts without a write hazard, those timelines are not
coupled and the requests may be emitted to hw in any order. So emit a
write hazard for each individual dword in the scratch (avoiding the
write hazard for the scratch as a whole) to ensure the writes do occur
in the expected order.

Signed-off-by: Chris Wilson 
---
tests/gem_ctx_thrash.c | 92 --
1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/tests/gem_ctx_thrash.c b/tests/gem_ctx_thrash.c
index 2cd9cfebf..b25f95f13 100644
--- a/tests/gem_ctx_thrash.c
+++ b/tests/gem_ctx_thrash.c
@@ -90,17 +90,13 @@ static void single(const char *name, bool all_engines)
{
struct drm_i915_gem_exec_object2 *obj;
struct drm_i915_gem_relocation_entry *reloc;
- unsigned engines[16];
- uint64_t size;
- uint32_t *ctx, *map, scratch;
- unsigned num_ctx;
- int fd, gen, num_engines;
+ unsigned int engines[16], num_engines, num_ctx;
+ uint32_t *ctx, *map, scratch, size;
+ int fd, gen;
#define MAX_LOOP 16

- fd = drm_open_driver_master(DRIVER_INTEL);

+ fd = drm_open_driver(DRIVER_INTEL);
igt_require_gem(fd);
- igt_require(gem_can_store_dword(fd, 0));
-
gem_require_contexts(fd);

gen = intel_gen(intel_get_drm_devid(fd));

@@ -108,54 +104,77 @@ static void single(const char *name, bool all_engines)
num_engines = 0;
if (all_engines) {
unsigned engine;
+
for_each_physical_engine(fd, engine) {
+ if (!gem_can_store_dword(fd, engine))
+ continue;
+
engines[num_engines++] = engine;
if (num_engines == ARRAY_SIZE(engines))
break;
}
- } else
+ } else {
+ igt_require(gem_can_store_dword(fd, 0));
engines[num_engines++] = 0;
+ }
+ igt_require(num_engines);

num_ctx = get_num_contexts(fd, num_engines);

size = ALIGN(num_ctx * sizeof(uint32_t), 4096);

- scratch = gem_create(fd, ALIGN(num_ctx * sizeof(uint32_t), 4096));
+ scratch = gem_create(fd, size);
gem_set_caching(fd, scratch, I915_CACHING_CACHED);
- obj = calloc(num_ctx, 2 * sizeof(*obj));
- reloc = calloc(num_ctx, sizeof(*reloc));
+ obj = calloc(num_ctx, 3 * sizeof(*obj));
+ reloc = calloc(num_ctx, 2 * sizeof(*reloc));

ctx = malloc(num_ctx * sizeof(uint32_t));

igt_assert(ctx);
for (unsigned n = 0; n < num_ctx; n++) {
ctx[n] = gem_context_create(fd);
- obj[2*n + 0].handle = scratch;
-
- reloc[n].target_handle = scratch;
- reloc[n].presumed_offset = 0;
- reloc[n].offset = sizeof(uint32_t);
- reloc[n].delta = n * sizeof(uint32_t);
- reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
- reloc[n].write_domain = 0; /* lies! */
+
+ obj[3*n + 0].handle = gem_create(fd, 4096);
+ reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
+ reloc[2*n + 0].presumed_offset = 0;
+ reloc[2*n + 0].offset = 4000;
+ reloc[2*n + 0].delta = 0;
+ reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+ obj[3*n + 1].handle = scratch;
+ reloc[2*n + 1].target_handle = scratch;
+ reloc[2*n + 1].presumed_offset = 0;
+ reloc[2*n + 1].offset = sizeof(uint32_t);
+ reloc[2*n + 1].delta = n * sizeof(uint32_t);
+ reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc[2*n + 1].write_domain = 0; /* lies! */
if (gen >= 4 && gen < 8)
- reloc[n].offset += sizeof(uint32_t);
+ reloc[2*n + 1].offset += sizeof(uint32_t);

- obj[2*n + 1].relocs_ptr = to_user_pointer([n]);

- obj[2*n + 1].relocation_count = 1;
+ obj[3*n + 2].relocs_ptr = to_user_pointer([2*n]);
+ obj[3*n + 2].relocation_count = 2;
}

map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);

- for (unsigned loop = 1; loop <= MAX_LOOP; loop <<= 1) {
- unsigned count = loop * num_ctx;
+ for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
+ const unsigned int count = loop * num_ctx;
uint32_t *all;

all = malloc(count * 

Re: [Intel-gfx] [PATCH i-g-t 3/6] igt/gem_ctx_thrash: Order writes between contexts

2018-05-15 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-05-15 09:20:13)
> 
> On 14/05/2018 16:10, Chris Wilson wrote:
> > Quoting Tvrtko Ursulin (2018-05-14 11:59:09)
> >>
> >> On 14/05/2018 09:02, Chris Wilson wrote:
> >>> The test wrote to the same dwords from multiple contexts, assuming that
> >>> the writes would be ordered by its submission. However, as it was using
> >>> multiple contexts without a write hazard, those timelines are not
> >>> coupled and the requests may be emitted to hw in any order. So emit a
> >>> write hazard for each individual dword in the scratch (avoiding the
> >>> write hazard for the scratch as a whole) to ensure the writes do occur
> >>> in the expected order.
> >>>
> >>> Signed-off-by: Chris Wilson 
> >>> ---
> >>>tests/gem_ctx_thrash.c | 92 --
> >>>1 file changed, 53 insertions(+), 39 deletions(-)
> >>>
> >>> diff --git a/tests/gem_ctx_thrash.c b/tests/gem_ctx_thrash.c
> >>> index 2cd9cfebf..b25f95f13 100644
> >>> --- a/tests/gem_ctx_thrash.c
> >>> +++ b/tests/gem_ctx_thrash.c
> >>> @@ -90,17 +90,13 @@ static void single(const char *name, bool all_engines)
> >>>{
> >>>struct drm_i915_gem_exec_object2 *obj;
> >>>struct drm_i915_gem_relocation_entry *reloc;
> >>> - unsigned engines[16];
> >>> - uint64_t size;
> >>> - uint32_t *ctx, *map, scratch;
> >>> - unsigned num_ctx;
> >>> - int fd, gen, num_engines;
> >>> + unsigned int engines[16], num_engines, num_ctx;
> >>> + uint32_t *ctx, *map, scratch, size;
> >>> + int fd, gen;
> >>>#define MAX_LOOP 16
> >>>
> >>> - fd = drm_open_driver_master(DRIVER_INTEL);
> >>> + fd = drm_open_driver(DRIVER_INTEL);
> >>>igt_require_gem(fd);
> >>> - igt_require(gem_can_store_dword(fd, 0));
> >>> -
> >>>gem_require_contexts(fd);
> >>>
> >>>gen = intel_gen(intel_get_drm_devid(fd));
> >>> @@ -108,54 +104,77 @@ static void single(const char *name, bool 
> >>> all_engines)
> >>>num_engines = 0;
> >>>if (all_engines) {
> >>>unsigned engine;
> >>> +
> >>>for_each_physical_engine(fd, engine) {
> >>> + if (!gem_can_store_dword(fd, engine))
> >>> + continue;
> >>> +
> >>>engines[num_engines++] = engine;
> >>>if (num_engines == ARRAY_SIZE(engines))
> >>>break;
> >>>}
> >>> - } else
> >>> + } else {
> >>> + igt_require(gem_can_store_dword(fd, 0));
> >>>engines[num_engines++] = 0;
> >>> + }
> >>> + igt_require(num_engines);
> >>>
> >>>num_ctx = get_num_contexts(fd, num_engines);
> >>>
> >>>size = ALIGN(num_ctx * sizeof(uint32_t), 4096);
> >>> - scratch = gem_create(fd, ALIGN(num_ctx * sizeof(uint32_t), 4096));
> >>> + scratch = gem_create(fd, size);
> >>>gem_set_caching(fd, scratch, I915_CACHING_CACHED);
> >>> - obj = calloc(num_ctx, 2 * sizeof(*obj));
> >>> - reloc = calloc(num_ctx, sizeof(*reloc));
> >>> + obj = calloc(num_ctx, 3 * sizeof(*obj));
> >>> + reloc = calloc(num_ctx, 2 * sizeof(*reloc));
> >>>
> >>>ctx = malloc(num_ctx * sizeof(uint32_t));
> >>>igt_assert(ctx);
> >>>for (unsigned n = 0; n < num_ctx; n++) {
> >>>ctx[n] = gem_context_create(fd);
> >>> - obj[2*n + 0].handle = scratch;
> >>> -
> >>> - reloc[n].target_handle = scratch;
> >>> - reloc[n].presumed_offset = 0;
> >>> - reloc[n].offset = sizeof(uint32_t);
> >>> - reloc[n].delta = n * sizeof(uint32_t);
> >>> - reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> >>> - reloc[n].write_domain = 0; /* lies! */
> >>> +
> >>> + obj[3*n + 0].handle = gem_create(fd, 4096);
> >>> + reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
> >>> + reloc[2*n + 0].presumed_offset = 0;
> >>> + reloc[2*n + 0].offset = 4000;
> >>> + reloc[2*n + 0].delta = 0;
> >>> + reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
> >>> + reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
> >>> +
> >>> + obj[3*n + 1].handle = scratch;
> >>> + reloc[2*n + 1].target_handle = scratch;
> >>> + reloc[2*n + 1].presumed_offset = 0;
> >>> + reloc[2*n + 1].offset = sizeof(uint32_t);
> >>> + reloc[2*n + 1].delta = n * sizeof(uint32_t);
> >>> + reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
> >>> + reloc[2*n + 1].write_domain = 0; /* lies! */
> >>>if (gen >= 4 && gen < 8)
> >>> - reloc[n].offset += sizeof(uint32_t);
> >>> + reloc[2*n + 1].offset += sizeof(uint32_t);
> >>>
> >>> - obj[2*n + 1].relocs_ptr = to_user_pointer([n]);

Re: [Intel-gfx] [PATCH i-g-t 3/6] igt/gem_ctx_thrash: Order writes between contexts

2018-05-15 Thread Tvrtko Ursulin


On 14/05/2018 16:10, Chris Wilson wrote:

Quoting Tvrtko Ursulin (2018-05-14 11:59:09)


On 14/05/2018 09:02, Chris Wilson wrote:

The test wrote to the same dwords from multiple contexts, assuming that
the writes would be ordered by its submission. However, as it was using
multiple contexts without a write hazard, those timelines are not
coupled and the requests may be emitted to hw in any order. So emit a
write hazard for each individual dword in the scratch (avoiding the
write hazard for the scratch as a whole) to ensure the writes do occur
in the expected order.

Signed-off-by: Chris Wilson 
---
   tests/gem_ctx_thrash.c | 92 --
   1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/tests/gem_ctx_thrash.c b/tests/gem_ctx_thrash.c
index 2cd9cfebf..b25f95f13 100644
--- a/tests/gem_ctx_thrash.c
+++ b/tests/gem_ctx_thrash.c
@@ -90,17 +90,13 @@ static void single(const char *name, bool all_engines)
   {
   struct drm_i915_gem_exec_object2 *obj;
   struct drm_i915_gem_relocation_entry *reloc;
- unsigned engines[16];
- uint64_t size;
- uint32_t *ctx, *map, scratch;
- unsigned num_ctx;
- int fd, gen, num_engines;
+ unsigned int engines[16], num_engines, num_ctx;
+ uint32_t *ctx, *map, scratch, size;
+ int fd, gen;
   #define MAX_LOOP 16
   
- fd = drm_open_driver_master(DRIVER_INTEL);

+ fd = drm_open_driver(DRIVER_INTEL);
   igt_require_gem(fd);
- igt_require(gem_can_store_dword(fd, 0));
-
   gem_require_contexts(fd);
   
   gen = intel_gen(intel_get_drm_devid(fd));

@@ -108,54 +104,77 @@ static void single(const char *name, bool all_engines)
   num_engines = 0;
   if (all_engines) {
   unsigned engine;
+
   for_each_physical_engine(fd, engine) {
+ if (!gem_can_store_dword(fd, engine))
+ continue;
+
   engines[num_engines++] = engine;
   if (num_engines == ARRAY_SIZE(engines))
   break;
   }
- } else
+ } else {
+ igt_require(gem_can_store_dword(fd, 0));
   engines[num_engines++] = 0;
+ }
+ igt_require(num_engines);
   
   num_ctx = get_num_contexts(fd, num_engines);
   
   size = ALIGN(num_ctx * sizeof(uint32_t), 4096);

- scratch = gem_create(fd, ALIGN(num_ctx * sizeof(uint32_t), 4096));
+ scratch = gem_create(fd, size);
   gem_set_caching(fd, scratch, I915_CACHING_CACHED);
- obj = calloc(num_ctx, 2 * sizeof(*obj));
- reloc = calloc(num_ctx, sizeof(*reloc));
+ obj = calloc(num_ctx, 3 * sizeof(*obj));
+ reloc = calloc(num_ctx, 2 * sizeof(*reloc));
   
   ctx = malloc(num_ctx * sizeof(uint32_t));

   igt_assert(ctx);
   for (unsigned n = 0; n < num_ctx; n++) {
   ctx[n] = gem_context_create(fd);
- obj[2*n + 0].handle = scratch;
-
- reloc[n].target_handle = scratch;
- reloc[n].presumed_offset = 0;
- reloc[n].offset = sizeof(uint32_t);
- reloc[n].delta = n * sizeof(uint32_t);
- reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
- reloc[n].write_domain = 0; /* lies! */
+
+ obj[3*n + 0].handle = gem_create(fd, 4096);
+ reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
+ reloc[2*n + 0].presumed_offset = 0;
+ reloc[2*n + 0].offset = 4000;
+ reloc[2*n + 0].delta = 0;
+ reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+ obj[3*n + 1].handle = scratch;
+ reloc[2*n + 1].target_handle = scratch;
+ reloc[2*n + 1].presumed_offset = 0;
+ reloc[2*n + 1].offset = sizeof(uint32_t);
+ reloc[2*n + 1].delta = n * sizeof(uint32_t);
+ reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc[2*n + 1].write_domain = 0; /* lies! */
   if (gen >= 4 && gen < 8)
- reloc[n].offset += sizeof(uint32_t);
+ reloc[2*n + 1].offset += sizeof(uint32_t);
   
- obj[2*n + 1].relocs_ptr = to_user_pointer([n]);

- obj[2*n + 1].relocation_count = 1;
+ obj[3*n + 2].relocs_ptr = to_user_pointer([2*n]);
+ obj[3*n + 2].relocation_count = 2;
   }
   
   map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);

- for (unsigned loop = 1; loop <= MAX_LOOP; loop <<= 1) {
- unsigned count = loop * num_ctx;
+ for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
+ const unsigned int count = loop * num_ctx;
   uint32_t *all;
   
   all = malloc(count * sizeof(uint32_t));

- for (unsigned n = 0; n < count; n++)
+ for (unsigned int n = 0; n < count; n++)
  

Re: [Intel-gfx] [PATCH i-g-t 3/6] igt/gem_ctx_thrash: Order writes between contexts

2018-05-14 Thread Chris Wilson
Quoting Tvrtko Ursulin (2018-05-14 11:59:09)
> 
> On 14/05/2018 09:02, Chris Wilson wrote:
> > The test wrote to the same dwords from multiple contexts, assuming that
> > the writes would be ordered by its submission. However, as it was using
> > multiple contexts without a write hazard, those timelines are not
> > coupled and the requests may be emitted to hw in any order. So emit a
> > write hazard for each individual dword in the scratch (avoiding the
> > write hazard for the scratch as a whole) to ensure the writes do occur
> > in the expected order.
> > 
> > Signed-off-by: Chris Wilson 
> > ---
> >   tests/gem_ctx_thrash.c | 92 --
> >   1 file changed, 53 insertions(+), 39 deletions(-)
> > 
> > diff --git a/tests/gem_ctx_thrash.c b/tests/gem_ctx_thrash.c
> > index 2cd9cfebf..b25f95f13 100644
> > --- a/tests/gem_ctx_thrash.c
> > +++ b/tests/gem_ctx_thrash.c
> > @@ -90,17 +90,13 @@ static void single(const char *name, bool all_engines)
> >   {
> >   struct drm_i915_gem_exec_object2 *obj;
> >   struct drm_i915_gem_relocation_entry *reloc;
> > - unsigned engines[16];
> > - uint64_t size;
> > - uint32_t *ctx, *map, scratch;
> > - unsigned num_ctx;
> > - int fd, gen, num_engines;
> > + unsigned int engines[16], num_engines, num_ctx;
> > + uint32_t *ctx, *map, scratch, size;
> > + int fd, gen;
> >   #define MAX_LOOP 16
> >   
> > - fd = drm_open_driver_master(DRIVER_INTEL);
> > + fd = drm_open_driver(DRIVER_INTEL);
> >   igt_require_gem(fd);
> > - igt_require(gem_can_store_dword(fd, 0));
> > -
> >   gem_require_contexts(fd);
> >   
> >   gen = intel_gen(intel_get_drm_devid(fd));
> > @@ -108,54 +104,77 @@ static void single(const char *name, bool all_engines)
> >   num_engines = 0;
> >   if (all_engines) {
> >   unsigned engine;
> > +
> >   for_each_physical_engine(fd, engine) {
> > + if (!gem_can_store_dword(fd, engine))
> > + continue;
> > +
> >   engines[num_engines++] = engine;
> >   if (num_engines == ARRAY_SIZE(engines))
> >   break;
> >   }
> > - } else
> > + } else {
> > + igt_require(gem_can_store_dword(fd, 0));
> >   engines[num_engines++] = 0;
> > + }
> > + igt_require(num_engines);
> >   
> >   num_ctx = get_num_contexts(fd, num_engines);
> >   
> >   size = ALIGN(num_ctx * sizeof(uint32_t), 4096);
> > - scratch = gem_create(fd, ALIGN(num_ctx * sizeof(uint32_t), 4096));
> > + scratch = gem_create(fd, size);
> >   gem_set_caching(fd, scratch, I915_CACHING_CACHED);
> > - obj = calloc(num_ctx, 2 * sizeof(*obj));
> > - reloc = calloc(num_ctx, sizeof(*reloc));
> > + obj = calloc(num_ctx, 3 * sizeof(*obj));
> > + reloc = calloc(num_ctx, 2 * sizeof(*reloc));
> >   
> >   ctx = malloc(num_ctx * sizeof(uint32_t));
> >   igt_assert(ctx);
> >   for (unsigned n = 0; n < num_ctx; n++) {
> >   ctx[n] = gem_context_create(fd);
> > - obj[2*n + 0].handle = scratch;
> > -
> > - reloc[n].target_handle = scratch;
> > - reloc[n].presumed_offset = 0;
> > - reloc[n].offset = sizeof(uint32_t);
> > - reloc[n].delta = n * sizeof(uint32_t);
> > - reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
> > - reloc[n].write_domain = 0; /* lies! */
> > +
> > + obj[3*n + 0].handle = gem_create(fd, 4096);
> > + reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
> > + reloc[2*n + 0].presumed_offset = 0;
> > + reloc[2*n + 0].offset = 4000;
> > + reloc[2*n + 0].delta = 0;
> > + reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
> > + reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
> > +
> > + obj[3*n + 1].handle = scratch;
> > + reloc[2*n + 1].target_handle = scratch;
> > + reloc[2*n + 1].presumed_offset = 0;
> > + reloc[2*n + 1].offset = sizeof(uint32_t);
> > + reloc[2*n + 1].delta = n * sizeof(uint32_t);
> > + reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
> > + reloc[2*n + 1].write_domain = 0; /* lies! */
> >   if (gen >= 4 && gen < 8)
> > - reloc[n].offset += sizeof(uint32_t);
> > + reloc[2*n + 1].offset += sizeof(uint32_t);
> >   
> > - obj[2*n + 1].relocs_ptr = to_user_pointer([n]);
> > - obj[2*n + 1].relocation_count = 1;
> > + obj[3*n + 2].relocs_ptr = to_user_pointer([2*n]);
> > + obj[3*n + 2].relocation_count = 2;
> >   }
> >   
> >   map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);
> > - for (unsigned loop = 1; loop <= MAX_LOOP; loop <<= 1) {
> > - 

Re: [Intel-gfx] [PATCH i-g-t 3/6] igt/gem_ctx_thrash: Order writes between contexts

2018-05-14 Thread Tvrtko Ursulin


On 14/05/2018 09:02, Chris Wilson wrote:

The test wrote to the same dwords from multiple contexts, assuming that
the writes would be ordered by its submission. However, as it was using
multiple contexts without a write hazard, those timelines are not
coupled and the requests may be emitted to hw in any order. So emit a
write hazard for each individual dword in the scratch (avoiding the
write hazard for the scratch as a whole) to ensure the writes do occur
in the expected order.

Signed-off-by: Chris Wilson 
---
  tests/gem_ctx_thrash.c | 92 --
  1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/tests/gem_ctx_thrash.c b/tests/gem_ctx_thrash.c
index 2cd9cfebf..b25f95f13 100644
--- a/tests/gem_ctx_thrash.c
+++ b/tests/gem_ctx_thrash.c
@@ -90,17 +90,13 @@ static void single(const char *name, bool all_engines)
  {
struct drm_i915_gem_exec_object2 *obj;
struct drm_i915_gem_relocation_entry *reloc;
-   unsigned engines[16];
-   uint64_t size;
-   uint32_t *ctx, *map, scratch;
-   unsigned num_ctx;
-   int fd, gen, num_engines;
+   unsigned int engines[16], num_engines, num_ctx;
+   uint32_t *ctx, *map, scratch, size;
+   int fd, gen;
  #define MAX_LOOP 16
  
-	fd = drm_open_driver_master(DRIVER_INTEL);

+   fd = drm_open_driver(DRIVER_INTEL);
igt_require_gem(fd);
-   igt_require(gem_can_store_dword(fd, 0));
-
gem_require_contexts(fd);
  
  	gen = intel_gen(intel_get_drm_devid(fd));

@@ -108,54 +104,77 @@ static void single(const char *name, bool all_engines)
num_engines = 0;
if (all_engines) {
unsigned engine;
+
for_each_physical_engine(fd, engine) {
+   if (!gem_can_store_dword(fd, engine))
+   continue;
+
engines[num_engines++] = engine;
if (num_engines == ARRAY_SIZE(engines))
break;
}
-   } else
+   } else {
+   igt_require(gem_can_store_dword(fd, 0));
engines[num_engines++] = 0;
+   }
+   igt_require(num_engines);
  
  	num_ctx = get_num_contexts(fd, num_engines);
  
  	size = ALIGN(num_ctx * sizeof(uint32_t), 4096);

-   scratch = gem_create(fd, ALIGN(num_ctx * sizeof(uint32_t), 4096));
+   scratch = gem_create(fd, size);
gem_set_caching(fd, scratch, I915_CACHING_CACHED);
-   obj = calloc(num_ctx, 2 * sizeof(*obj));
-   reloc = calloc(num_ctx, sizeof(*reloc));
+   obj = calloc(num_ctx, 3 * sizeof(*obj));
+   reloc = calloc(num_ctx, 2 * sizeof(*reloc));
  
  	ctx = malloc(num_ctx * sizeof(uint32_t));

igt_assert(ctx);
for (unsigned n = 0; n < num_ctx; n++) {
ctx[n] = gem_context_create(fd);
-   obj[2*n + 0].handle = scratch;
-
-   reloc[n].target_handle = scratch;
-   reloc[n].presumed_offset = 0;
-   reloc[n].offset = sizeof(uint32_t);
-   reloc[n].delta = n * sizeof(uint32_t);
-   reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-   reloc[n].write_domain = 0; /* lies! */
+
+   obj[3*n + 0].handle = gem_create(fd, 4096);
+   reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
+   reloc[2*n + 0].presumed_offset = 0;
+   reloc[2*n + 0].offset = 4000;
+   reloc[2*n + 0].delta = 0;
+   reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+   obj[3*n + 1].handle = scratch;
+   reloc[2*n + 1].target_handle = scratch;
+   reloc[2*n + 1].presumed_offset = 0;
+   reloc[2*n + 1].offset = sizeof(uint32_t);
+   reloc[2*n + 1].delta = n * sizeof(uint32_t);
+   reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[2*n + 1].write_domain = 0; /* lies! */
if (gen >= 4 && gen < 8)
-   reloc[n].offset += sizeof(uint32_t);
+   reloc[2*n + 1].offset += sizeof(uint32_t);
  
-		obj[2*n + 1].relocs_ptr = to_user_pointer([n]);

-   obj[2*n + 1].relocation_count = 1;
+   obj[3*n + 2].relocs_ptr = to_user_pointer([2*n]);
+   obj[3*n + 2].relocation_count = 2;
}
  
  	map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);

-   for (unsigned loop = 1; loop <= MAX_LOOP; loop <<= 1) {
-   unsigned count = loop * num_ctx;
+   for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
+   const unsigned int count = loop * num_ctx;
uint32_t *all;
  
  		all = malloc(count * sizeof(uint32_t));

-   for (unsigned n = 0; n < count; n++)
+   for (unsigned int n = 0; n < count; n++)

[Intel-gfx] [PATCH i-g-t 3/6] igt/gem_ctx_thrash: Order writes between contexts

2018-05-14 Thread Chris Wilson
The test wrote to the same dwords from multiple contexts, assuming that
the writes would be ordered by its submission. However, as it was using
multiple contexts without a write hazard, those timelines are not
coupled and the requests may be emitted to hw in any order. So emit a
write hazard for each individual dword in the scratch (avoiding the
write hazard for the scratch as a whole) to ensure the writes do occur
in the expected order.

Signed-off-by: Chris Wilson 
---
 tests/gem_ctx_thrash.c | 92 --
 1 file changed, 53 insertions(+), 39 deletions(-)

diff --git a/tests/gem_ctx_thrash.c b/tests/gem_ctx_thrash.c
index 2cd9cfebf..b25f95f13 100644
--- a/tests/gem_ctx_thrash.c
+++ b/tests/gem_ctx_thrash.c
@@ -90,17 +90,13 @@ static void single(const char *name, bool all_engines)
 {
struct drm_i915_gem_exec_object2 *obj;
struct drm_i915_gem_relocation_entry *reloc;
-   unsigned engines[16];
-   uint64_t size;
-   uint32_t *ctx, *map, scratch;
-   unsigned num_ctx;
-   int fd, gen, num_engines;
+   unsigned int engines[16], num_engines, num_ctx;
+   uint32_t *ctx, *map, scratch, size;
+   int fd, gen;
 #define MAX_LOOP 16
 
-   fd = drm_open_driver_master(DRIVER_INTEL);
+   fd = drm_open_driver(DRIVER_INTEL);
igt_require_gem(fd);
-   igt_require(gem_can_store_dword(fd, 0));
-
gem_require_contexts(fd);
 
gen = intel_gen(intel_get_drm_devid(fd));
@@ -108,54 +104,77 @@ static void single(const char *name, bool all_engines)
num_engines = 0;
if (all_engines) {
unsigned engine;
+
for_each_physical_engine(fd, engine) {
+   if (!gem_can_store_dword(fd, engine))
+   continue;
+
engines[num_engines++] = engine;
if (num_engines == ARRAY_SIZE(engines))
break;
}
-   } else
+   } else {
+   igt_require(gem_can_store_dword(fd, 0));
engines[num_engines++] = 0;
+   }
+   igt_require(num_engines);
 
num_ctx = get_num_contexts(fd, num_engines);
 
size = ALIGN(num_ctx * sizeof(uint32_t), 4096);
-   scratch = gem_create(fd, ALIGN(num_ctx * sizeof(uint32_t), 4096));
+   scratch = gem_create(fd, size);
gem_set_caching(fd, scratch, I915_CACHING_CACHED);
-   obj = calloc(num_ctx, 2 * sizeof(*obj));
-   reloc = calloc(num_ctx, sizeof(*reloc));
+   obj = calloc(num_ctx, 3 * sizeof(*obj));
+   reloc = calloc(num_ctx, 2 * sizeof(*reloc));
 
ctx = malloc(num_ctx * sizeof(uint32_t));
igt_assert(ctx);
for (unsigned n = 0; n < num_ctx; n++) {
ctx[n] = gem_context_create(fd);
-   obj[2*n + 0].handle = scratch;
-
-   reloc[n].target_handle = scratch;
-   reloc[n].presumed_offset = 0;
-   reloc[n].offset = sizeof(uint32_t);
-   reloc[n].delta = n * sizeof(uint32_t);
-   reloc[n].read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-   reloc[n].write_domain = 0; /* lies! */
+
+   obj[3*n + 0].handle = gem_create(fd, 4096);
+   reloc[2*n + 0].target_handle = obj[3*n + 0].handle;
+   reloc[2*n + 0].presumed_offset = 0;
+   reloc[2*n + 0].offset = 4000;
+   reloc[2*n + 0].delta = 0;
+   reloc[2*n + 0].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[2*n + 0].write_domain = I915_GEM_DOMAIN_RENDER;
+
+   obj[3*n + 1].handle = scratch;
+   reloc[2*n + 1].target_handle = scratch;
+   reloc[2*n + 1].presumed_offset = 0;
+   reloc[2*n + 1].offset = sizeof(uint32_t);
+   reloc[2*n + 1].delta = n * sizeof(uint32_t);
+   reloc[2*n + 1].read_domains = I915_GEM_DOMAIN_RENDER;
+   reloc[2*n + 1].write_domain = 0; /* lies! */
if (gen >= 4 && gen < 8)
-   reloc[n].offset += sizeof(uint32_t);
+   reloc[2*n + 1].offset += sizeof(uint32_t);
 
-   obj[2*n + 1].relocs_ptr = to_user_pointer([n]);
-   obj[2*n + 1].relocation_count = 1;
+   obj[3*n + 2].relocs_ptr = to_user_pointer([2*n]);
+   obj[3*n + 2].relocation_count = 2;
}
 
map = gem_mmap__cpu(fd, scratch, 0, size, PROT_WRITE);
-   for (unsigned loop = 1; loop <= MAX_LOOP; loop <<= 1) {
-   unsigned count = loop * num_ctx;
+   for (unsigned int loop = 1; loop <= MAX_LOOP; loop <<= 1) {
+   const unsigned int count = loop * num_ctx;
uint32_t *all;
 
all = malloc(count * sizeof(uint32_t));
-   for (unsigned n = 0; n < count; n++)
+   for (unsigned int n = 0; n < count; n++)
all[n] =