[Intel-gfx] [PATCH v3 2/3] igt/gem_wait: Use new igt_spin_batch

2016-11-03 Thread Abdiel Janulgue
Cc: Chris Wilson 
Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_wait.c | 125 ---
 1 file changed, 7 insertions(+), 118 deletions(-)

diff --git a/tests/gem_wait.c b/tests/gem_wait.c
index b4127de..785bb14 100644
--- a/tests/gem_wait.c
+++ b/tests/gem_wait.c
@@ -27,18 +27,6 @@
 
 #include "igt.h"
 
-#include 
-#include 
-#include 
-
-#define gettid() syscall(__NR_gettid)
-#define sigev_notify_thread_id _sigev_un._tid
-
-#define LOCAL_I915_EXEC_BSD_SHIFT  (13)
-#define LOCAL_I915_EXEC_BSD_MASK   (3 << LOCAL_I915_EXEC_BSD_SHIFT)
-
-#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
-
 static int __gem_wait(int fd, struct drm_i915_gem_wait *w)
 {
int err;
@@ -75,14 +63,6 @@ static void invalid_buf(int fd)
igt_assert_eq(__gem_wait(fd, &wait), -ENOENT);
 }
 
-static uint32_t *batch;
-
-static void sigiter(int sig, siginfo_t *info, void *arg)
-{
-   *batch = MI_BATCH_BUFFER_END;
-   __sync_synchronize();
-}
-
 #define MSEC_PER_SEC (1000)
 #define USEC_PER_SEC (1000 * MSEC_PER_SEC)
 #define NSEC_PER_SEC (1000 * USEC_PER_SEC)
@@ -91,113 +71,26 @@ static void sigiter(int sig, siginfo_t *info, void *arg)
 #define HANG 2
 static void basic(int fd, unsigned engine, unsigned flags)
 {
-   const int gen = intel_gen(intel_get_drm_devid(fd));
-   struct drm_i915_gem_exec_object2 obj;
-   struct drm_i915_gem_relocation_entry reloc;
-   struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_wait wait;
-   unsigned engines[16];
-   unsigned nengine;
-   int i, timeout;
-
-   nengine = 0;
-   if (engine == -1) {
-   for_each_engine(fd, engine)
-   if (engine) engines[nengine++] = engine;
-   } else {
-   igt_require(gem_has_ring(fd, engine));
-   engines[nengine++] = engine;
-   }
-   igt_require(nengine);
-
-   memset(&execbuf, 0, sizeof(execbuf));
-   execbuf.buffers_ptr = (uintptr_t)&obj;
-   execbuf.buffer_count = 1;
-
-   memset(&obj, 0, sizeof(obj));
-   obj.handle = gem_create(fd, 4096);
-
-   obj.relocs_ptr = (uintptr_t)&reloc;
-   obj.relocation_count = 1;
-   memset(&reloc, 0, sizeof(reloc));
-
-   batch = gem_mmap__gtt(fd, obj.handle, 4096, PROT_WRITE);
-   gem_set_domain(fd, obj.handle,
-   I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
-
-   reloc.target_handle = obj.handle; /* recurse */
-   reloc.presumed_offset = 0;
-   reloc.offset = sizeof(uint32_t);
-   reloc.delta = 0;
-   reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
-   reloc.write_domain = 0;
-
-   i = 0;
-   batch[i] = MI_BATCH_BUFFER_START;
-   if (gen >= 8) {
-   batch[i] |= 1 << 8 | 1;
-   batch[++i] = 0;
-   batch[++i] = 0;
-   } else if (gen >= 6) {
-   batch[i] |= 1 << 8;
-   batch[++i] = 0;
-   } else {
-   batch[i] |= 2 << 6;
-   batch[++i] = 0;
-   if (gen < 4) {
-   batch[i] |= 1;
-   reloc.delta = 1;
-   }
-   }
-
-   for (i = 0; i < nengine; i++) {
-   execbuf.flags &= ~ENGINE_MASK;
-   execbuf.flags |= engines[i];
-   gem_execbuf(fd, &execbuf);
-   }
+   int wait_s = (flags == 0) ? NSEC_PER_SEC : 0;
+   wait_s = ((flags & HANG) == 0) ? wait_s : -1;
+   igt_spin_t spin = igt_spin_batch(fd, wait_s, engine, 0);
+   int timeout;
 
memset(&wait, 0, sizeof(wait));
-   wait.bo_handle = obj.handle;
-   igt_assert_eq(__gem_wait(fd, &wait), -ETIME);
+   wait.bo_handle = spin.handle;
 
if (flags & BUSY) {
struct timespec tv;
 
timeout = 120;
-   if ((flags & HANG) == 0) {
-   *batch = MI_BATCH_BUFFER_END;
-   __sync_synchronize();
+   if ((flags & HANG) == 0)
timeout = 1;
-   }
 
memset(&tv, 0, sizeof(tv));
while (__gem_wait(fd, &wait) == -ETIME)
igt_assert(igt_seconds_elapsed(&tv) < timeout);
} else {
-   timer_t timer;
-
-   if ((flags & HANG) == 0) {
-   struct sigevent sev;
-   struct sigaction act;
-   struct itimerspec its;
-
-   memset(&sev, 0, sizeof(sev));
-   sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID;
-   sev.sigev_notify_thread_id = gettid();
-   sev.sigev_signo = SIGRTMIN + 1;
-   igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) 
== 0);
-
-

[Intel-gfx] [PATCH v3 1/3] lib: add igt_dummyload

2016-11-03 Thread Abdiel Janulgue
A lot of igt testcases need some GPU workload to make sure a race
window is big enough. Unfortunately having a fixed amount of
workload leads to spurious test failures or overtly long runtimes
on some fast/slow platforms. This library contains functionality
to submit GPU workloads that should consume exactly a specific
amount of time.

v2 : Add recursive batch feature from Chris
v3 : Drop auto-tuned stuff. Add bo dependecy to recursive batch
 by adding a dummy reloc to the bo as suggested by Ville.

Cc: Daniel Vetter 
Cc: Ville Syrjälä 
Cc: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 lib/Makefile.sources |   2 +
 lib/igt.h|   1 +
 lib/igt_dummyload.c  | 274 +++
 lib/igt_dummyload.h  |  42 
 4 files changed, 319 insertions(+)
 create mode 100644 lib/igt_dummyload.c
 create mode 100644 lib/igt_dummyload.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index e8e277b..7fc5ec2 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -75,6 +75,8 @@ lib_source_list = \
igt_draw.h  \
igt_pm.c\
igt_pm.h\
+   igt_dummyload.c \
+   igt_dummyload.h \
uwildmat/uwildmat.h \
uwildmat/uwildmat.c \
$(NULL)
diff --git a/lib/igt.h b/lib/igt.h
index d751f24..a0028d5 100644
--- a/lib/igt.h
+++ b/lib/igt.h
@@ -32,6 +32,7 @@
 #include "igt_core.h"
 #include "igt_debugfs.h"
 #include "igt_draw.h"
+#include "igt_dummyload.h"
 #include "igt_fb.h"
 #include "igt_gt.h"
 #include "igt_kms.h"
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
new file mode 100644
index 000..d37a30b
--- /dev/null
+++ b/lib/igt_dummyload.c
@@ -0,0 +1,274 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+#include 
+#include 
+#include 
+
+/**
+ * SECTION:igt_dummyload
+ * @short_description: Library for submitting GPU workloads
+ * @title: Dummyload
+ * @include: igt.h
+ *
+ * A lot of igt testcases need some GPU workload to make sure a race window is
+ * big enough. Unfortunately having a fixed amount of workload leads to
+ * spurious test failures or overtly long runtimes on some fast/slow platforms.
+ * This library contains functionality to submit GPU workloads that should
+ * consume exactly a specific amount of time.
+ */
+
+#define NSEC_PER_SEC 10L
+
+#define gettid() syscall(__NR_gettid)
+#define sigev_notify_thread_id _sigev_un._tid
+
+#define LOCAL_I915_EXEC_BSD_SHIFT  (13)
+#define LOCAL_I915_EXEC_BSD_MASK   (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+static void
+fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle,
+   struct drm_i915_gem_relocation_entry *relocs, uint32_t count)
+{
+   memset(obj, 0, sizeof(*obj));
+   obj->handle = gem_handle;
+   obj->relocation_count = count;
+   obj->relocs_ptr = (uintptr_t)relocs;
+}
+
+static void
+fill_reloc(struct drm_i915_gem_relocation_entry *reloc,
+  uint32_t gem_handle, uint32_t offset,
+  uint32_t read_domains, uint32_t write_domains)
+{
+   reloc->target_handle = gem_handle;
+   reloc->delta = 0;
+   reloc->offset = offset * sizeof(uint32_t);
+   reloc->presumed_offset = 0;
+   reloc->read_domains = read_domains;
+   reloc->write_domain = write_domains;
+}
+
+
+static uint32_t *batch;
+
+static uint32_t emit_recursive_batch(int fd, int engine, unsigned dep_handle)
+{
+   const int gen = intel_gen(intel_get_drm_devid(fd));
+   struct drm_i915_gem_exec_object2 obj[2];
+   struct d

Re: [Intel-gfx] [PATCH 3/3] igt/kms_flip: Use new igt_spin_batch

2016-10-30 Thread Abdiel Janulgue


On 10/28/2016 07:02 PM, Ville Syrjälä wrote:
> On Fri, Oct 28, 2016 at 06:47:26PM +0300, Abdiel Janulgue wrote:
>> Cc: Chris Wilson 
>> Cc: Daniel Vetter 
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  tests/kms_flip.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/tests/kms_flip.c b/tests/kms_flip.c
>> index 9829b35..13cb262 100644
>> --- a/tests/kms_flip.c
>> +++ b/tests/kms_flip.c
>> @@ -866,10 +866,10 @@ static unsigned int run_test_step(struct test_output 
>> *o)
>>  o->current_fb_id = !o->current_fb_id;
>>  
>>  if (o->flags & TEST_WITH_DUMMY_BCS)
>> -emit_dummy_load__bcs(o, 1);
>> +igt_spin_batch_wait(drm_fd, 1, I915_EXEC_BLT);
>>  
>>  if (o->flags & TEST_WITH_DUMMY_RCS)
>> -emit_dummy_load__rcs(o, 1);
>> +igt_spin_batch_wait(drm_fd, 1, I915_EXEC_RENDER);
> 
> NAK That's not going to add the required dependency between the load
> and the bo used by the test.

Right. For these cases would it be more straightforward to stick to the
auto-tuned rendercopy/blit load operations on the bo instead of
inserting a recursive batch on these situations? Any ideas?

>>  
>>  if (o->flags & TEST_FB_RECREATE)
>>  recreate_fb(o);
>> -- 
>> 2.7.0
>>
>> ___
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/3] igt/gem_wait: Use new igt_spin_batch

2016-10-28 Thread Abdiel Janulgue
Cc: Chris Wilson 
Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_wait.c | 125 ---
 1 file changed, 7 insertions(+), 118 deletions(-)

diff --git a/tests/gem_wait.c b/tests/gem_wait.c
index b4127de..630058c 100644
--- a/tests/gem_wait.c
+++ b/tests/gem_wait.c
@@ -27,18 +27,6 @@
 
 #include "igt.h"
 
-#include 
-#include 
-#include 
-
-#define gettid() syscall(__NR_gettid)
-#define sigev_notify_thread_id _sigev_un._tid
-
-#define LOCAL_I915_EXEC_BSD_SHIFT  (13)
-#define LOCAL_I915_EXEC_BSD_MASK   (3 << LOCAL_I915_EXEC_BSD_SHIFT)
-
-#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
-
 static int __gem_wait(int fd, struct drm_i915_gem_wait *w)
 {
int err;
@@ -75,14 +63,6 @@ static void invalid_buf(int fd)
igt_assert_eq(__gem_wait(fd, &wait), -ENOENT);
 }
 
-static uint32_t *batch;
-
-static void sigiter(int sig, siginfo_t *info, void *arg)
-{
-   *batch = MI_BATCH_BUFFER_END;
-   __sync_synchronize();
-}
-
 #define MSEC_PER_SEC (1000)
 #define USEC_PER_SEC (1000 * MSEC_PER_SEC)
 #define NSEC_PER_SEC (1000 * USEC_PER_SEC)
@@ -91,113 +71,26 @@ static void sigiter(int sig, siginfo_t *info, void *arg)
 #define HANG 2
 static void basic(int fd, unsigned engine, unsigned flags)
 {
-   const int gen = intel_gen(intel_get_drm_devid(fd));
-   struct drm_i915_gem_exec_object2 obj;
-   struct drm_i915_gem_relocation_entry reloc;
-   struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_wait wait;
-   unsigned engines[16];
-   unsigned nengine;
-   int i, timeout;
-
-   nengine = 0;
-   if (engine == -1) {
-   for_each_engine(fd, engine)
-   if (engine) engines[nengine++] = engine;
-   } else {
-   igt_require(gem_has_ring(fd, engine));
-   engines[nengine++] = engine;
-   }
-   igt_require(nengine);
-
-   memset(&execbuf, 0, sizeof(execbuf));
-   execbuf.buffers_ptr = (uintptr_t)&obj;
-   execbuf.buffer_count = 1;
-
-   memset(&obj, 0, sizeof(obj));
-   obj.handle = gem_create(fd, 4096);
-
-   obj.relocs_ptr = (uintptr_t)&reloc;
-   obj.relocation_count = 1;
-   memset(&reloc, 0, sizeof(reloc));
-
-   batch = gem_mmap__gtt(fd, obj.handle, 4096, PROT_WRITE);
-   gem_set_domain(fd, obj.handle,
-   I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
-
-   reloc.target_handle = obj.handle; /* recurse */
-   reloc.presumed_offset = 0;
-   reloc.offset = sizeof(uint32_t);
-   reloc.delta = 0;
-   reloc.read_domains = I915_GEM_DOMAIN_COMMAND;
-   reloc.write_domain = 0;
-
-   i = 0;
-   batch[i] = MI_BATCH_BUFFER_START;
-   if (gen >= 8) {
-   batch[i] |= 1 << 8 | 1;
-   batch[++i] = 0;
-   batch[++i] = 0;
-   } else if (gen >= 6) {
-   batch[i] |= 1 << 8;
-   batch[++i] = 0;
-   } else {
-   batch[i] |= 2 << 6;
-   batch[++i] = 0;
-   if (gen < 4) {
-   batch[i] |= 1;
-   reloc.delta = 1;
-   }
-   }
-
-   for (i = 0; i < nengine; i++) {
-   execbuf.flags &= ~ENGINE_MASK;
-   execbuf.flags |= engines[i];
-   gem_execbuf(fd, &execbuf);
-   }
+   int wait_s = (flags == 0) ? 1 : 0;
+   wait_s = ((flags & HANG) == 0) ? wait_s : -1;
+   igt_spin_t spin = igt_spin_batch(fd, wait_s, engine);
+   int timeout;
 
memset(&wait, 0, sizeof(wait));
-   wait.bo_handle = obj.handle;
-   igt_assert_eq(__gem_wait(fd, &wait), -ETIME);
+   wait.bo_handle = spin.handle;
 
if (flags & BUSY) {
struct timespec tv;
 
timeout = 120;
-   if ((flags & HANG) == 0) {
-   *batch = MI_BATCH_BUFFER_END;
-   __sync_synchronize();
+   if ((flags & HANG) == 0)
timeout = 1;
-   }
 
memset(&tv, 0, sizeof(tv));
while (__gem_wait(fd, &wait) == -ETIME)
igt_assert(igt_seconds_elapsed(&tv) < timeout);
} else {
-   timer_t timer;
-
-   if ((flags & HANG) == 0) {
-   struct sigevent sev;
-   struct sigaction act;
-   struct itimerspec its;
-
-   memset(&sev, 0, sizeof(sev));
-   sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID;
-   sev.sigev_notify_thread_id = gettid();
-   sev.sigev_signo = SIGRTMIN + 1;
-   igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) 
== 0);
-
- 

[Intel-gfx] [PATCH 3/3] igt/kms_flip: Use new igt_spin_batch

2016-10-28 Thread Abdiel Janulgue
Cc: Chris Wilson 
Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 tests/kms_flip.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/kms_flip.c b/tests/kms_flip.c
index 9829b35..13cb262 100644
--- a/tests/kms_flip.c
+++ b/tests/kms_flip.c
@@ -866,10 +866,10 @@ static unsigned int run_test_step(struct test_output *o)
o->current_fb_id = !o->current_fb_id;
 
if (o->flags & TEST_WITH_DUMMY_BCS)
-   emit_dummy_load__bcs(o, 1);
+   igt_spin_batch_wait(drm_fd, 1, I915_EXEC_BLT);
 
if (o->flags & TEST_WITH_DUMMY_RCS)
-   emit_dummy_load__rcs(o, 1);
+   igt_spin_batch_wait(drm_fd, 1, I915_EXEC_RENDER);
 
if (o->flags & TEST_FB_RECREATE)
recreate_fb(o);
-- 
2.7.0

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/3] lib: add igt_dummyload

2016-10-28 Thread Abdiel Janulgue
Generalized from auto-tuned GPU dummy workload in gem_wait and kms_flip

v2 : Add recursive batch feature from Chris

Cc: Chris Wilson 
Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 lib/Makefile.sources |   2 +
 lib/igt.h|   1 +
 lib/igt_dummyload.c  | 613 +++
 lib/igt_dummyload.h  |  77 +++
 4 files changed, 693 insertions(+)
 create mode 100644 lib/igt_dummyload.c
 create mode 100644 lib/igt_dummyload.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index e8e277b..7fc5ec2 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -75,6 +75,8 @@ lib_source_list = \
igt_draw.h  \
igt_pm.c\
igt_pm.h\
+   igt_dummyload.c \
+   igt_dummyload.h \
uwildmat/uwildmat.h \
uwildmat/uwildmat.c \
$(NULL)
diff --git a/lib/igt.h b/lib/igt.h
index d751f24..a0028d5 100644
--- a/lib/igt.h
+++ b/lib/igt.h
@@ -32,6 +32,7 @@
 #include "igt_core.h"
 #include "igt_debugfs.h"
 #include "igt_draw.h"
+#include "igt_dummyload.h"
 #include "igt_fb.h"
 #include "igt_gt.h"
 #include "igt_kms.h"
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
new file mode 100644
index 000..f7a64b7
--- /dev/null
+++ b/lib/igt_dummyload.c
@@ -0,0 +1,613 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+#include 
+#include 
+#include 
+
+/**
+ * SECTION:igt_dummyload
+ * @short_description: Library for submitting GPU workloads
+ * @title: Dummyload
+ * @include: igt.h
+ *
+ * A lot of igt testcases need some GPU workload to make sure a race window is
+ * big enough. Unfortunately having a fixed amount of workload leads to
+ * spurious test failures or overtly long runtimes on some fast/slow platforms.
+ * This library contains functionality to submit GPU workloads that should
+ * consume exactly a specific amount of time.
+ */
+
+#define USEC_PER_SEC 100L
+#define NSEC_PER_SEC 10L
+
+#define gettid() syscall(__NR_gettid)
+#define sigev_notify_thread_id _sigev_un._tid
+
+#define LOCAL_I915_EXEC_BSD_SHIFT  (13)
+#define LOCAL_I915_EXEC_BSD_MASK   (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_MASK  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+
+/* Internal data structures to avoid having to pass tons of parameters
+ * around. */
+struct dummy_info {
+   drm_intel_bufmgr *bufmgr;
+   struct intel_batchbuffer *batch;
+   int drm_fd;
+   uint32_t buf_handle;
+   uint32_t buf_stride;
+   uint32_t buf_tiling;
+   int fb_width;
+   int fb_height;
+};
+
+static void blit_copy(struct intel_batchbuffer *batch,
+ drm_intel_bo *dst, drm_intel_bo *src,
+ unsigned int width, unsigned int height,
+ unsigned int dst_pitch, unsigned int src_pitch)
+{
+   BLIT_COPY_BATCH_START(0);
+   OUT_BATCH((3 << 24) | /* 32 bits */
+ (0xcc << 16) | /* copy ROP */
+ dst_pitch);
+   OUT_BATCH(0 << 16 | 0);
+   OUT_BATCH(height << 16 | width);
+   OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 
0);
+   OUT_BATCH(0 << 16 | 0);
+   OUT_BATCH(src_pitch);
+   OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+   ADVANCE_BATCH();
+
+   if (batch->gen >= 6) {
+   BEGIN_BATCH(3, 0);
+   OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   }
+}
+
+static void blit_fill(struct intel_batchbuffe

[Intel-gfx] [RFC v2 i-g-t] Auto-tuned dummyload + recursive batch

2016-10-28 Thread Abdiel Janulgue
A lot of igt testcases need some GPU workload to make sure a race
window is big enough. Unfortunately having a fixed amount of
workload leads to spurious test failures or overtly long runtimes
on some fast/slow platforms. This library contains functionality
to submit GPU workloads that should consume exactly a specific
amount of time.

v2: Add recursive batch feature from Chris and use in gem_wait and
kms_flip. I've retained the previous auto-tuned dummy load
functions. Let me know if we need to drop those.

Abdiel Janulgue (3):
  lib: add igt_dummyload
  igt/gem_wait: Use new igt_spin_batch
  igt/kms_flip: Use new igt_spin_batch

 lib/Makefile.sources |   2 +
 lib/igt.h|   1 +
 lib/igt_dummyload.c  | 613 +++
 lib/igt_dummyload.h  |  77 
 tests/gem_wait.c | 125 +-
 tests/kms_flip.c |   4 +-


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC i-g-t PATCH 2/3] igt/gem_wait: Use new igt_dummyload api

2016-10-14 Thread Abdiel Janulgue


On 10/13/2016 06:17 PM, Daniel Vetter wrote:
> On Thu, Oct 13, 2016 at 10:49:39AM +0100, Chris Wilson wrote:
>> On Thu, Oct 13, 2016 at 12:31:13PM +0300, Abdiel Janulgue wrote:
>>>
>>>
>>> On 10/12/2016 03:07 PM, Chris Wilson wrote:
>>>> On Wed, Oct 12, 2016 at 02:59:53PM +0300, Abdiel Janulgue wrote:
>>>>> Signed-off-by: Abdiel Janulgue 
>>>>> ---
>>>>>  tests/gem_wait.c | 77 
>>>>> +---
>>>>>  1 file changed, 12 insertions(+), 65 deletions(-)
>>>>
>>>> We can do so much better than a dummy load here. We can precisely
>>>> control how long we want the object to be busy by using a recursive
>>>> batch buffer (and terminating that batch at the exact moment we require).
>>>> -Chris
>>>>
>>> Hi Chris, I see you've posted a better solution to gem_wait. I could
>>> drop this one and defer to yours instead. So for now, igt_dummyload has
>>> dropped to only 1 customer at the moment: kms_flip.  Let me know whether
>>> it's possible to upstream this dummyload api.
>>
>> kms_flip would probably be better with a specific load rather than a
>> dummy as well. The challenge is whether the flip works given various
>> input states of the framebuffers, and the more control we have over
>> those inputs the better.
> 
> Oh yeah, this is a pretty sweet idea with a spinning batch that we
> manually terminate. Assuming it works everywhere I think this is a much
> better approach, and by submitting different workloads we can always put
> the delay workload exactly where we want.
> 
> Abdiel, can you pls update the JIRA to instead extract Chris' trick into a
> library (pretty sure Chris can help with bikeshedding the interface to
> make it all pretty) and then roll that one out? Being able to control the
> time used by delay tasks down to jiffies is real sweet.

Ok I'll try that. Thanks!

-Abdiel

> 
> Also there might be some space to reuse this with Mika's hangcheck stuff,
> not sure.
> -Daniel
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC i-g-t PATCH 2/3] igt/gem_wait: Use new igt_dummyload api

2016-10-13 Thread Abdiel Janulgue


On 10/12/2016 03:07 PM, Chris Wilson wrote:
> On Wed, Oct 12, 2016 at 02:59:53PM +0300, Abdiel Janulgue wrote:
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  tests/gem_wait.c | 77 
>> +---
>>  1 file changed, 12 insertions(+), 65 deletions(-)
> 
> We can do so much better than a dummy load here. We can precisely
> control how long we want the object to be busy by using a recursive
> batch buffer (and terminating that batch at the exact moment we require).
> -Chris
> 
Hi Chris, I see you've posted a better solution to gem_wait. I could
drop this one and defer to yours instead. So for now, igt_dummyload has
dropped to only 1 customer at the moment: kms_flip.  Let me know whether
it's possible to upstream this dummyload api.

-Abdiel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC i-g-t PATCH 1/3] lib: add igt_dummyload

2016-10-12 Thread Abdiel Janulgue
Generalized from auto-tuned GPU dummy workload in gem_wait and kms_flip

Signed-off-by: Abdiel Janulgue 
---
 lib/Makefile.sources |   2 +
 lib/igt.h|   1 +
 lib/igt_dummyload.c  | 419 +++
 lib/igt_dummyload.h  |  63 
 4 files changed, 485 insertions(+)
 create mode 100644 lib/igt_dummyload.c
 create mode 100644 lib/igt_dummyload.h

diff --git a/lib/Makefile.sources b/lib/Makefile.sources
index e8e277b..7fc5ec2 100644
--- a/lib/Makefile.sources
+++ b/lib/Makefile.sources
@@ -75,6 +75,8 @@ lib_source_list = \
igt_draw.h  \
igt_pm.c\
igt_pm.h\
+   igt_dummyload.c \
+   igt_dummyload.h \
uwildmat/uwildmat.h \
uwildmat/uwildmat.c \
$(NULL)
diff --git a/lib/igt.h b/lib/igt.h
index d751f24..a0028d5 100644
--- a/lib/igt.h
+++ b/lib/igt.h
@@ -32,6 +32,7 @@
 #include "igt_core.h"
 #include "igt_debugfs.h"
 #include "igt_draw.h"
+#include "igt_dummyload.h"
 #include "igt_fb.h"
 #include "igt_gt.h"
 #include "igt_kms.h"
diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c
new file mode 100644
index 000..908d839
--- /dev/null
+++ b/lib/igt_dummyload.c
@@ -0,0 +1,419 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include "igt_dummyload.h"
+#include 
+
+/**
+ * SECTION:igt_dummyload
+ * @short_description: Library for submitting auto-tuned dummy GPU workloads
+ * @title: Dummyload
+ * @include: igt.h
+ *
+ * A lot of igt testcases need some dummy load to make sure a race window is
+ * big enough. Unfortunately having a fixed amount of workload leads to
+ * spurious test failures or overtly long runtimes on some fast/slow platforms.
+ * This library contains functionality to submit GPU workloads that is
+ * dynamically tuned to consume a specific amount of time.
+ */
+
+#define USEC_PER_SEC 100L
+#define NSEC_PER_SEC 10L
+
+/* Internal data structures to avoid having to pass tons of parameters
+ * around. */
+struct dummy_info {
+   drm_intel_bufmgr *bufmgr;
+   struct intel_batchbuffer *batch;
+   int drm_fd;
+   uint32_t buf_handle;
+   uint32_t buf_stride;
+   uint32_t buf_tiling;
+   int fb_width;
+   int fb_height;
+};
+
+static void blit_copy(struct intel_batchbuffer *batch,
+ drm_intel_bo *dst, drm_intel_bo *src,
+ unsigned int width, unsigned int height,
+ unsigned int dst_pitch, unsigned int src_pitch)
+{
+   BLIT_COPY_BATCH_START(0);
+   OUT_BATCH((3 << 24) | /* 32 bits */
+ (0xcc << 16) | /* copy ROP */
+ dst_pitch);
+   OUT_BATCH(0 << 16 | 0);
+   OUT_BATCH(height << 16 | width);
+   OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 
0);
+   OUT_BATCH(0 << 16 | 0);
+   OUT_BATCH(src_pitch);
+   OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+   ADVANCE_BATCH();
+
+   if (batch->gen >= 6) {
+   BEGIN_BATCH(3, 0);
+   OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+   }
+}
+
+static void blit_fill(struct intel_batchbuffer *batch, drm_intel_bo *dst,
+ unsigned int width, unsigned int height)
+{
+   COLOR_BLIT_COPY_BATCH_START(COLOR_BLT_WRITE_ALPHA |
+   XY_COLOR_BLT_WRITE_RGB);
+   OUT_BATCH((3 << 24) | /* 32 Bit Color */
+ (0xF0 << 16)  | /* Raster OP copy background register */
+ 0);

[Intel-gfx] [RFC i-g-t PATCH 3/3] igt/kms_flip: Use new igt_dummyload api

2016-10-12 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 tests/kms_flip.c | 191 +++
 1 file changed, 10 insertions(+), 181 deletions(-)

diff --git a/tests/kms_flip.c b/tests/kms_flip.c
index 065ad66..93cf391 100644
--- a/tests/kms_flip.c
+++ b/tests/kms_flip.c
@@ -187,109 +187,6 @@ static unsigned long gettime_us(void)
return ts.tv_sec * 100 + ts.tv_nsec / 1000;
 }
 
-static int calibrate_dummy_load(struct test_output *o,
-   const char *ring_name,
-   int (*emit)(struct test_output *o, int limit, 
int timeout))
-{
-   unsigned long start;
-   int ops = 1;
-
-   start = gettime_us();
-
-   do {
-   unsigned long diff;
-   int ret;
-
-   ret = emit(o, (ops+1)/2, 10);
-   diff = gettime_us() - start;
-
-   if (ret || diff / USEC_PER_SEC >= 1)
-   break;
-
-   ops += ops;
-   } while (ops < 10);
-
-   igt_debug("%s dummy load calibrated: %d operations / second\n",
- ring_name, ops);
-
-   return ops;
-}
-
-static void blit_copy(drm_intel_bo *dst, drm_intel_bo *src,
- unsigned int width, unsigned int height,
- unsigned int dst_pitch, unsigned int src_pitch)
-{
-   BLIT_COPY_BATCH_START(0);
-   OUT_BATCH((3 << 24) | /* 32 bits */
- (0xcc << 16) | /* copy ROP */
- dst_pitch);
-   OUT_BATCH(0 << 16 | 0);
-   OUT_BATCH(height << 16 | width);
-   OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 
0);
-   OUT_BATCH(0 << 16 | 0);
-   OUT_BATCH(src_pitch);
-   OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
-   ADVANCE_BATCH();
-
-   if (batch->gen >= 6) {
-   BEGIN_BATCH(3, 0);
-   OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
-   OUT_BATCH(0);
-   OUT_BATCH(0);
-   ADVANCE_BATCH();
-   }
-}
-
-static int _emit_dummy_load__bcs(struct test_output *o, int limit, int timeout)
-{
-   int i, ret = 0;
-   drm_intel_bo *src_bo, *dst_bo, *fb_bo;
-   struct igt_fb *fb_info = &o->fb_info[o->current_fb_id];
-
-   igt_require(bufmgr);
-
-   src_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
-   igt_assert(src_bo);
-
-   dst_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
-   igt_assert(dst_bo);
-
-   fb_bo = gem_handle_to_libdrm_bo(bufmgr, drm_fd, "imported", 
fb_info->gem_handle);
-   igt_assert(fb_bo);
-
-   for (i = 0; i < limit; i++) {
-   blit_copy(dst_bo, src_bo,
- 2048, 2048,
- 2048*4, 2048*4);
-
-   igt_swap(src_bo, dst_bo);
-   }
-   blit_copy(fb_bo, src_bo,
- min(o->fb_width, 2048), min(o->fb_height, 2048),
- fb_info->stride, 2048*4);
-   intel_batchbuffer_flush(batch);
-
-   if (timeout > 0)
-   ret = drm_intel_gem_bo_wait(fb_bo, timeout * NSEC_PER_SEC);
-
-   drm_intel_bo_unreference(src_bo);
-   drm_intel_bo_unreference(dst_bo);
-   drm_intel_bo_unreference(fb_bo);
-
-   return ret;
-}
-
-static void emit_dummy_load__bcs(struct test_output *o, int seconds)
-{
-   static int ops_per_sec;
-
-   if (ops_per_sec == 0)
-   ops_per_sec = calibrate_dummy_load(o, "bcs",
-  _emit_dummy_load__bcs);
-
-   _emit_dummy_load__bcs(o, seconds * ops_per_sec, 0);
-}
-
 static void emit_fence_stress(struct test_output *o)
 {
const int num_fences = gem_available_fences(drm_fd);
@@ -334,82 +231,6 @@ static void emit_fence_stress(struct test_output *o)
free(exec);
 }
 
-static int _emit_dummy_load__rcs(struct test_output *o, int limit, int timeout)
-{
-   const struct igt_fb *fb_info = &o->fb_info[o->current_fb_id];
-   igt_render_copyfunc_t copyfunc;
-   struct igt_buf sb[3], *src, *dst, *fb;
-   int i, ret = 0;
-
-   igt_require(bufmgr);
-
-   copyfunc = igt_get_render_copyfunc(devid);
-   if (copyfunc == NULL)
-   return _emit_dummy_load__bcs(o, limit, timeout);
-
-   sb[0].bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
-   igt_assert(sb[0].bo);
-   sb[0].size = sb[0].bo->size;
-   sb[0].tiling = I915_TILING_NONE;
-   sb[0].data = NULL;
-   sb[0].num_tiles = sb[0].bo->size;
-   sb[0].stride = 4 * 2048;
-
-   sb[1].bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
-   igt_assert(sb[1].bo);
-   sb[1].size = sb[1].bo->size;
-   sb[1].tiling = I915_TILING_NONE;
-   sb[1].data = NULL;
-   sb[1].num_tiles = sb[1].bo->size;
-  

[Intel-gfx] [RFC i-g-t] Extract autotuned dummy load into lib

2016-10-12 Thread Abdiel Janulgue
A lot of igt testcases need some dummy load to make sure a race
window is big enough. Unfortunately having a fixed amount of
workload leads to spurious test failures or overtly long runtimes
on some fast/slow platforms. This library contains functionality
to submit GPU workloads that is dynamically tuned to consume a
specific amount of time.

This functionality is generalized to lib from existing features in
gem_wait and kms_flip. In the future, we could update test cases
that could benefit from auto-tuned dummy workloads to use this
new api.

Abdiel Janulgue (3):
  lib: add igt_dummyload
  igt/gem_wait: Use new igt_dummyload api
  igt/kms_flip: Use new igt_dummyload api

 lib/Makefile.sources |   2 +
 lib/igt.h|   1 +
 lib/igt_dummyload.c  | 419 +++
 lib/igt_dummyload.h  |  63 ++
 tests/gem_wait.c |  77 ++-
 tests/kms_flip.c | 191 +-
 6 files changed, 507 insertions(+), 246 deletions(-)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC i-g-t PATCH 2/3] igt/gem_wait: Use new igt_dummyload api

2016-10-12 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_wait.c | 77 +---
 1 file changed, 12 insertions(+), 65 deletions(-)

diff --git a/tests/gem_wait.c b/tests/gem_wait.c
index 461efdb..24a5f5e 100644
--- a/tests/gem_wait.c
+++ b/tests/gem_wait.c
@@ -54,36 +54,6 @@
 #define BUF_PAGES ((8<<20)>>12)
 drm_intel_bo *dst, *dst2;
 
-/* returns time diff in milliseconds */
-static int64_t
-do_time_diff(struct timespec *end, struct timespec *start)
-{
-   int64_t ret;
-   ret = (MSEC_PER_SEC * difftime(end->tv_sec, start->tv_sec)) +
- ((end->tv_nsec/NSEC_PER_MSEC) - (start->tv_nsec/NSEC_PER_MSEC));
-   return ret;
-}
-
-static void blt_color_fill(struct intel_batchbuffer *batch,
-  drm_intel_bo *buf,
-  const unsigned int pages)
-{
-   const unsigned short height = pages/4;
-   const unsigned short width =  4096;
-
-   COLOR_BLIT_COPY_BATCH_START(COLOR_BLT_WRITE_ALPHA |
-   XY_COLOR_BLT_WRITE_RGB);
-   OUT_BATCH((3 << 24) | /* 32 Bit Color */
- (0xF0 << 16)  | /* Raster OP copy background register */
- 0); /* Dest pitch is 0 */
-   OUT_BATCH(0);
-   OUT_BATCH(width << 16   |
- height);
-   OUT_RELOC_FENCED(buf, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 
0);
-   OUT_BATCH(rand()); /* random pattern */
-   ADVANCE_BATCH();
-}
-
 static void render_timeout(int fd)
 {
drm_intel_bufmgr *bufmgr;
@@ -91,10 +61,11 @@ static void render_timeout(int fd)
int64_t timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
int64_t negative_timeout = -1;
int ret;
+   const unsigned short height = BUF_PAGES/4;
+   const unsigned short width =  4096;
const bool do_signals = true; /* signals will seem to make the operation
   * use less process CPU time */
-   bool done = false;
-   int i, iter = 1;
+   int iter = 1;
 
igt_skip_on_simulation();
 
@@ -112,30 +83,9 @@ static void render_timeout(int fd)
/* Figure out a rough number of fills required to consume 1 second of
 * GPU work.
 */
-   do {
-   struct timespec start, end;
-   long diff;
-
-#ifndef CLOCK_MONOTONIC_RAW
-#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC
-#endif
-
-   igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &start) == 0);
-   for (i = 0; i < iter; i++)
-   blt_color_fill(batch, dst, BUF_PAGES);
-   intel_batchbuffer_flush(batch);
-   drm_intel_bo_wait_rendering(dst);
-   igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &end) == 0);
-
-   diff = do_time_diff(&end, &start);
-   igt_assert(diff >= 0);
-
-   if ((diff / MSEC_PER_SEC) > ENOUGH_WORK_IN_SECONDS)
-   done = true;
-   else
-   iter <<= 1;
-   } while (!done && iter < 100);
-
+   iter = igt_calibrate_dummy_load(bufmgr, batch, fd,
+   dst->handle, 0,
+   width, height, 1, IGT_DUMMY_BLIT_FILL);
igt_assert_lt(iter, 100);
 
igt_debug("%d iters is enough work\n", iter);
@@ -146,10 +96,9 @@ static void render_timeout(int fd)
/* We should be able to do half as much work in the same amount of time,
 * but because we might schedule almost twice as much as required, we
 * might accidentally time out. Hence add some fudge. */
-   for (i = 0; i < iter/3; i++)
-   blt_color_fill(batch, dst2, BUF_PAGES);
 
-   intel_batchbuffer_flush(batch);
+   igt_emit_dummy_load(bufmgr, batch, fd, dst2->handle, 0,
+   width, height, iter/3, 0, IGT_DUMMY_BLIT_FILL);
igt_assert(gem_bo_busy(fd, dst2->handle) == true);
 
igt_assert_eq(gem_wait(fd, dst2->handle, &timeout), 0);
@@ -168,10 +117,9 @@ static void render_timeout(int fd)
/* Now check that we correctly time out, twice the auto-tune load should
 * be good enough. */
timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC;
-   for (i = 0; i < iter*2; i++)
-   blt_color_fill(batch, dst2, BUF_PAGES);
 
-   intel_batchbuffer_flush(batch);
+   igt_emit_dummy_load(bufmgr, batch, fd, dst2->handle, 0,
+   width, height, iter*2, 0, IGT_DUMMY_BLIT_FILL);
 
ret = gem_wait(fd, dst2->handle, &timeout);
igt_assert_eq(ret, -ETIME);
@@ -186,10 +134,9 @@ static void render_timeout(int fd)
 
/* Now check that we can pass negative (infinite) timeouts. */
negative_timeout = -1;
-   for (i = 0; i < iter; i++)
-   blt_

Re: [Intel-gfx] [PATCH v4] drm/i915: Adjust size of PIPE_CONTROL used for gen8 render seqno write

2016-04-13 Thread Abdiel Janulgue


On 04/12/2016 04:51 PM, Michał Winiarski wrote:
> We started to use PIPE_CONTROL to write render ring seqno in order to
> combat seqno write vs interrupt generation problems. This was introduced
> by commit 7c17d377374d ("drm/i915: Use ordered seqno write interrupt
> generation on gen8+ execlists").
> 
> On gen8+ size of PIPE_CONTROL with Post Sync Operation should be
> 6 dwords. When we're using older 5-dword variant it's possible to
> observe inconsistent values written by PIPE_CONTROL with Post
> Sync Operation from user batches, resulting in rendering corruptions.
> 
> v2: Fix BAT failures
> v3: Comments on alignment and thrashing high dword of seqno (Chris)
> v4: Updated commit msg (Mika)
> 
> Testcase: igt/gem_pipe_control_store_loop/*-qword-write
> Issue: VIZ-7393
> Cc: sta...@vger.kernel.org
> Cc: Chris Wilson 
> Cc: Mika Kuoppala 
> Cc: Abdiel Janulgue 

Tested-by: Abdiel Janulgue 


> Signed-off-by: Michał Winiarski 
> ---
>  drivers/gpu/drm/i915/intel_lrc.c | 10 --
>  1 file changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
> b/drivers/gpu/drm/i915/intel_lrc.c
> index 0d6dc5e..30abe53 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1945,15 +1945,18 @@ static int gen8_emit_request_render(struct 
> drm_i915_gem_request *request)
>   struct intel_ringbuffer *ringbuf = request->ringbuf;
>   int ret;
>  
> - ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS);
> + ret = intel_logical_ring_begin(request, 8 + WA_TAIL_DWORDS);
>   if (ret)
>   return ret;
>  
> + /* We're using qword write, seqno should be aligned to 8 bytes. */
> + BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1);
> +
>   /* w/a for post sync ops following a GPGPU operation we
>* need a prior CS_STALL, which is emitted by the flush
>* following the batch.
>*/
> - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(5));
> + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6));
>   intel_logical_ring_emit(ringbuf,
>   (PIPE_CONTROL_GLOBAL_GTT_IVB |
>PIPE_CONTROL_CS_STALL |
> @@ -1961,7 +1964,10 @@ static int gen8_emit_request_render(struct 
> drm_i915_gem_request *request)
>   intel_logical_ring_emit(ringbuf, hws_seqno_address(request->engine));
>   intel_logical_ring_emit(ringbuf, 0);
>   intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request));
> + /* We're thrashing one dword of HWS. */
> + intel_logical_ring_emit(ringbuf, 0);
>   intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT);
> + intel_logical_ring_emit(ringbuf, MI_NOOP);
>   return intel_logical_ring_advance_and_submit(request);
>  }
>  
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 5/5] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-07-06 Thread Abdiel Janulgue


On 07/06/2015 11:28 AM, Daniel Vetter wrote:
> On Thu, Jul 02, 2015 at 11:15:40AM +0100, Chris Wilson wrote:
>> On Wed, Jul 01, 2015 at 10:12:23AM +0300, Abdiel Janulgue wrote:
>>> Ensures that the batch buffer is executed by the resource streamer
>>>
>>> v2: Don't skip 1<<15 for the exec flags (Jani Nikula)
>>> v3: Use HAS_RESOURCE_STREAMER macro for execbuf validation (Chris Wilson)
>>>
>>> Testcase: igt/gem_exec_params
>>> Cc: Jani Nikula 
>>> Reviewed-by: Chris Wilson 
>>> Signed-off-by: Abdiel Janulgue 
>>
>> This no longer applies (unrecognised base). Honestly, I would prefer 4&5
>> squashed together, or 4 after 5 so that we do not declare
>> HAS_RESOURCE_STREAMER before we accept the RS execbuf.
>>
>> Minor bit of patch reordering, but the code in 4 looks ok, so
>> Reviewed-by: Chris Wilson  # for 4/5
> 
> Yeah I just reordered 5 to go before 4. Series merged to dinq, thanks.
> Abdiel can you pls push the corresponding igts (or ask Thomas to do it for
> you)?

Thank you! I don't have commit rights in igt, but here is the patch:
http://lists.freedesktop.org/archives/intel-gfx/2015-June/068799.html

-Abdiel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 5/5] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-07-01 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

v2: Don't skip 1<<15 for the exec flags (Jani Nikula)
v3: Use HAS_RESOURCE_STREAMER macro for execbuf validation (Chris Wilson)

Testcase: igt/gem_exec_params
Cc: Jani Nikula 
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++
 include/uapi/drm/i915_drm.h|  7 ++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 600db74..83577c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1490,6 +1490,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!HAS_RESOURCE_STREAMER(dev)) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 and 
above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 51137bd..e7c29f1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -766,7 +766,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v4 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam

2015-07-01 Thread Abdiel Janulgue
This will let userspace know whether Resource Streamer is supported
in the kernel.

v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after
I915_PARAM_HAS_GPU_RESET.
v3: Only advertise RS support for hardware that supports it.
v4: Add HAS_RESOURCE_STREAMER() macro (Chris)

Suggested-by: Kenneth Graunke 
Cc: Kenneth Graunke 
Cc: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 drivers/gpu/drm/i915/i915_drv.h | 3 +++
 include/uapi/drm/i915_drm.h | 1 +
 3 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c5349fa..a42f165 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = i915.enable_hangcheck &&
intel_has_gpu_reset(dev);
break;
+   case I915_PARAM_HAS_RESOURCE_STREAMER:
+   value = HAS_RESOURCE_STREAMER(dev);
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ea9caf2..9968663 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2520,6 +2520,9 @@ struct drm_i915_cmd_table {
 
 #define HAS_CSR(dev)   (IS_SKYLAKE(dev))
 
+#define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \
+   INTEL_INFO(dev)->gen >= 8)
+
 #define INTEL_PCH_DEVICE_ID_MASK   0xff00
 #define INTEL_PCH_IBX_DEVICE_ID_TYPE   0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE   0x1c00
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f88cc1c..51137bd 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_SUBSLICE_TOTAL   33
 #define I915_PARAM_EU_TOTAL 34
 #define I915_PARAM_HAS_GPU_RESET35
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam

2015-06-29 Thread Abdiel Janulgue


On 06/24/2015 09:30 AM, Abdiel Janulgue wrote:
> 
> 
> On 06/16/2015 03:41 PM, Abdiel Janulgue wrote:
>> This will let userspace know whether Resource Streamer is supported
>> in the kernel.
>>
>> v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after
>> I915_PARAM_HAS_GPU_RESET.
>> v3: Only advertise RS support for hardware that supports it.
> 
> Ping. Any status on this one?

Chris?

> 
>>
>> Suggested-by: Kenneth Graunke 
>> Cc: Kenneth Graunke 
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  drivers/gpu/drm/i915/i915_dma.c | 3 +++
>>  include/uapi/drm/i915_drm.h | 1 +
>>  2 files changed, 4 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_dma.c 
>> b/drivers/gpu/drm/i915/i915_dma.c
>> index 88795d2..4f55f51 100644
>> --- a/drivers/gpu/drm/i915/i915_dma.c
>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>> @@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void 
>> *data,
>>  i915.reset &&
>>  intel_has_gpu_reset(dev);
>>  break;
>> +case I915_PARAM_HAS_RESOURCE_STREAMER:
>> +value = IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8;
>> +   break;
>>  default:
>>  DRM_DEBUG("Unknown parameter %d\n", param->param);
>>  return -EINVAL;
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index f88cc1c..51137bd 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
>>  #define I915_PARAM_SUBSLICE_TOTAL33
>>  #define I915_PARAM_EU_TOTAL  34
>>  #define I915_PARAM_HAS_GPU_RESET 35
>> +#define I915_PARAM_HAS_RESOURCE_STREAMER 36
>>  
>>  typedef struct drm_i915_getparam {
>>  int param;
>>
> ___
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam

2015-06-23 Thread Abdiel Janulgue


On 06/16/2015 03:41 PM, Abdiel Janulgue wrote:
> This will let userspace know whether Resource Streamer is supported
> in the kernel.
> 
> v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after
> I915_PARAM_HAS_GPU_RESET.
> v3: Only advertise RS support for hardware that supports it.

Ping. Any status on this one?

> 
> Suggested-by: Kenneth Graunke 
> Cc: Kenneth Graunke 
> Signed-off-by: Abdiel Janulgue 
> ---
>  drivers/gpu/drm/i915/i915_dma.c | 3 +++
>  include/uapi/drm/i915_drm.h | 1 +
>  2 files changed, 4 insertions(+)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 88795d2..4f55f51 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void 
> *data,
>   i915.reset &&
>   intel_has_gpu_reset(dev);
>   break;
> + case I915_PARAM_HAS_RESOURCE_STREAMER:
> + value = IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8;
> +   break;
>   default:
>   DRM_DEBUG("Unknown parameter %d\n", param->param);
>   return -EINVAL;
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index f88cc1c..51137bd 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_SUBSLICE_TOTAL 33
>  #define I915_PARAM_EU_TOTAL   34
>  #define I915_PARAM_HAS_GPU_RESET  35
> +#define I915_PARAM_HAS_RESOURCE_STREAMER 36
>  
>  typedef struct drm_i915_getparam {
>   int param;
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam

2015-06-16 Thread Abdiel Janulgue
This will let userspace know whether Resource Streamer is supported
in the kernel.

v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after
I915_PARAM_HAS_GPU_RESET.
v3: Only advertise RS support for hardware that supports it.

Suggested-by: Kenneth Graunke 
Cc: Kenneth Graunke 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 include/uapi/drm/i915_drm.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 88795d2..4f55f51 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
i915.reset &&
intel_has_gpu_reset(dev);
break;
+   case I915_PARAM_HAS_RESOURCE_STREAMER:
+   value = IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f88cc1c..51137bd 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_SUBSLICE_TOTAL   33
 #define I915_PARAM_EU_TOTAL 34
 #define I915_PARAM_HAS_GPU_RESET35
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam

2015-06-16 Thread Abdiel Janulgue
This will let userspace know whether Resource Streamer is supported
in the kernel.

v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after
I915_PARAM_HAS_GPU_RESET.

Suggested-by: Kenneth Graunke 
Cc: Kenneth Graunke 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 include/uapi/drm/i915_drm.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 88795d2..e7ad30f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
i915.reset &&
intel_has_gpu_reset(dev);
break;
+   case I915_PARAM_HAS_RESOURCE_STREAMER:
+   value = 1;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f88cc1c..51137bd 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_SUBSLICE_TOTAL   33
 #define I915_PARAM_EU_TOTAL 34
 #define I915_PARAM_HAS_GPU_RESET35
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 5/5] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-06-16 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

v2: Don't skip 1<<15 for the exec flags (Jani Nikula)

Testcase: igt/gem_exec_params
Cc: Jani Nikula 
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++
 include/uapi/drm/i915_drm.h|  7 ++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..ae064f0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1485,6 +1485,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 and 
above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 40ab1c8..debccd34 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -761,7 +761,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam

2015-06-16 Thread Abdiel Janulgue
This will let userspace know whether Resource Streamer is supported
in the kernel.

Suggested-by: Kenneth Graunke 
Cc: Kenneth Graunke 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_dma.c | 3 +++
 include/uapi/drm/i915_drm.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 68e0c85..10d4ead 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -163,6 +163,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
if (!value)
return -ENODEV;
break;
+   case I915_PARAM_HAS_RESOURCE_STREAMER:
+   value = 1;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..40ab1c8 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_REVISION  32
 #define I915_PARAM_SUBSLICE_TOTAL   33
 #define I915_PARAM_EU_TOTAL 34
+#define I915_PARAM_HAS_RESOURCE_STREAMER 35
 
 typedef struct drm_i915_getparam {
int param;
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 3/5] drm/i915: Enable resource streamer on Execlists

2015-06-16 Thread Abdiel Janulgue
GEN8 and above uses Execlists by default instead of the legacy
ringbuffer for batch execution. This patch enables the resource
streamer bits when required.

Patch is based on the initial work by Minu Mathai 
This version also adds the required bits to enable GEN8 Resource
Streamer context save and restore for Execlists.

Cc: ville.syrj...@linux.intel.com
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/intel_lrc.c | 8 ++--
 drivers/gpu/drm/i915/intel_lrc.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..b015e96 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1172,7 +1172,10 @@ static int gen8_emit_bb_start(struct intel_ringbuffer 
*ringbuf,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8));
+   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
+   (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ?
+MI_BATCH_RESOURCE_STREAMER : 0));
intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
intel_logical_ring_emit(ringbuf, MI_NOOP);
@@ -1726,7 +1729,8 @@ populate_lr_context(struct intel_context *ctx, struct 
drm_i915_gem_object *ctx_o
reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
reg_state[CTX_CONTEXT_CONTROL+1] =
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+  CTX_CTRL_RS_CTX_ENABLE);
reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
reg_state[CTX_RING_HEAD+1] = 0;
reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index adb731e4..de6087a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -32,6 +32,7 @@
 #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244)
 #define  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH   (1 << 3)
 #define  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT   (1 << 0)
+#define   CTX_CTRL_RS_CTX_ENABLE(1 << 1)
 #define RING_CONTEXT_STATUS_BUF(ring)  ((ring)->mmio_base+0x370)
 #define RING_CONTEXT_STATUS_PTR(ring)  ((ring)->mmio_base+0x3a0)
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 2/5] drm/i915: Enable Resource Streamer state save/restore on MI_SET_CONTEXT

2015-06-16 Thread Abdiel Janulgue
Also clarify comments on context size that the extra state for
Resource Streamer is included.

v2: Don't remove the extended save/restore enabled for older
platforms. (Ville)
Use new MI_SET_CONTEXT defines for HSW RS save/restore state
instead of extended save/restore. (Daniel)

Suggested-by: Chris Wilson 
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 4 +++-
 drivers/gpu/drm/i915/i915_reg.h | 5 -
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f3e84c4..a29dbcd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -509,7 +509,9 @@ mi_set_context(struct intel_engine_cs *ring,
}
 
/* These flags are for resource streamer on HSW+ */
-   if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
+   if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8)
+   flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
+   else if (INTEL_INFO(ring->dev)->gen < 8)
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 238bb25..2b1321d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -316,6 +316,8 @@
 #define   MI_RESTORE_EXT_STATE_EN  (1<<2)
 #define   MI_FORCE_RESTORE (1<<1)
 #define   MI_RESTORE_INHIBIT   (1<<0)
+#define   HSW_MI_RS_SAVE_STATE_EN   (1<<3)
+#define   HSW_MI_RS_RESTORE_STATE_EN(1<<2)
 #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT  MI_INSTR(0x1c, 2) /* GEN8+ */
@@ -2498,7 +2500,8 @@ enum skl_disp_power_wells {
  * valid. Now, docs explain in dwords what is in the context object. The full
  * size is 70720 bytes, however, the power context and execlist context will
  * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages.
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  */
 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
 /* Same as Haswell, but 72064 bytes now. */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 1/5] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-06-16 Thread Abdiel Janulgue
Adds support for enabling the resource streamer on the legacy
ringbuffer for HSW and GEN8.

Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b522eb6..238bb25 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -356,6 +356,7 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
 #define MI_PREDICATE_SRC0  (0x2400)
 #define MI_PREDICATE_SRC1  (0x2408)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..715cb2a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2385,7 +2385,9 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ?
+MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
@@ -2408,7 +2410,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
-0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+   (dispatch_flags & I915_DISPATCH_RS ?
+MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..3521bc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -167,6 +167,7 @@ struct  intel_engine_cs {
   unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_engine_cs *ring);
 
/* GEN8 signal/wait table - never trust comments!
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t v3] tests/gem_exec_params: check invalid flags for Resource Streamer

2015-06-16 Thread Abdiel Janulgue
Make sure resource streamer flags works only in correct ring in
addition to checking next flag after the RS boundary fails.

v2: Make sure we reject RS on pre-hsw.
v3: Don't skip 1<<15 for the exec flags (Jani Nikula)

Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_exec_params.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
index 54f0dc3..e9c13a4 100644
--- a/tests/gem_exec_params.c
+++ b/tests/gem_exec_params.c
@@ -48,6 +48,7 @@
 #define LOCAL_I915_EXEC_BSD_MASK (3<<13)
 #define LOCAL_I915_EXEC_BSD_RING1 (1<<13)
 #define LOCAL_I915_EXEC_BSD_RING2 (2<<13)
+#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<15)
 
 struct drm_i915_gem_execbuffer2 execbuf;
 struct drm_i915_gem_exec_object2 gem_exec[1];
@@ -220,7 +221,7 @@ igt_main
/* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle 
*/
 
igt_subtest("invalid-flag") {
-   execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1);
+   execbuf.flags = I915_EXEC_RENDER | 
(LOCAL_I915_EXEC_RESOURCE_STREAMER << 1);
RUN_FAIL(EINVAL);
}
 
@@ -234,6 +235,30 @@ igt_main
execbuf.num_cliprects = 0;
}
 
+   igt_subtest("rs-invalid-on-bsd-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_BSD | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-on-blt-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_BLT | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-on-vebox-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_VEBOX | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-gen") {
+   igt_require(!IS_HASWELL(devid) && intel_gen(devid) < 8);
+   execbuf.flags = I915_EXEC_RENDER | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
 #define DIRT(name) \
igt_subtest(#name "-dirt") { \
execbuf.flags = 0; \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-06-08 Thread Abdiel Janulgue


On 06/08/2015 07:10 PM, Ville Syrjälä wrote:
> On Mon, Jun 08, 2015 at 01:04:07PM +0300, Abdiel Janulgue wrote:
>> Adds support for executing the resource streamer on BDW and HSW
>>
>> v2: Add support for Execlists (Minu Mathai )
>>
>> Reviewed-by: Chris Wilson 
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  drivers/gpu/drm/i915/i915_reg.h | 1 +
>>  drivers/gpu/drm/i915/intel_lrc.c| 4 +++-
>>  drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++--
>>  drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
>>  4 files changed, 11 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h 
>> b/drivers/gpu/drm/i915/i915_reg.h
>> index b522eb6..238bb25 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -356,6 +356,7 @@
>>  #define MI_BATCH_BUFFER_START   MI_INSTR(0x31, 0)
>>  #define   MI_BATCH_GTT  (2<<6) /* aliased with (1<<7) on 
>> gen4 */
>>  #define MI_BATCH_BUFFER_START_GEN8  MI_INSTR(0x31, 1)
>> +#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
>>  
>>  #define MI_PREDICATE_SRC0   (0x2400)
>>  #define MI_PREDICATE_SRC1   (0x2408)
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index fcb074b..3b168f6 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -1172,7 +1172,9 @@ static int gen8_emit_bb_start(struct intel_ringbuffer 
>> *ringbuf,
>>  return ret;
>>  
>>  /* FIXME(BDW): Address space and security selectors. */
>> -intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
>> (ppgtt<<8));
>> +intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
>> +(ppgtt<<8) | (I915_DISPATCH_RS ?
> 
> That doesn't look right.

Yay.. Didn't catch these since this path never gets executed under GEN8
anyway which uses execlist not legacy batch buffer execution. Better
remove this then.
> 
>> +  MI_BATCH_RESOURCE_STREAMER : 0));
>>  intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
>>  intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
>>  intel_logical_ring_emit(ringbuf, MI_NOOP);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 441e250..715cb2a 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2385,7 +2385,9 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
>> *ring,
>>  return ret;
>>  
>>  /* FIXME(BDW): Address space and security selectors. */
>> -intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
>> +intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
>> +(dispatch_flags & I915_DISPATCH_RS ?
>> + MI_BATCH_RESOURCE_STREAMER : 0));
>>  intel_ring_emit(ring, lower_32_bits(offset));
>>  intel_ring_emit(ring, upper_32_bits(offset));
>>  intel_ring_emit(ring, MI_NOOP);
>> @@ -2408,7 +2410,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs 
>> *ring,
>>  intel_ring_emit(ring,
>>  MI_BATCH_BUFFER_START |
>>  (dispatch_flags & I915_DISPATCH_SECURE ?
>> - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
>> + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
>> +(dispatch_flags & I915_DISPATCH_RS ?
>> + MI_BATCH_RESOURCE_STREAMER : 0));
>>  /* bit0-7 is the length on GEN6+ */
>>  intel_ring_emit(ring, offset);
>>  intel_ring_advance(ring);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index c761fe0..3521bc0 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -167,6 +167,7 @@ struct  intel_engine_cs {
>> unsigned dispatch_flags);
>>  #define I915_DISPATCH_SECURE 0x1
>>  #define I915_DISPATCH_PINNED 0x2
>> +#define I915_DISPATCH_RS 0x4
>>  void(*cleanup)(struct intel_engine_cs *ring);
>>  
>>  /* GEN8 signal/wait table - never trust comments!
>> -- 
>> 1.9.1
>>
>> ___
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer v3

2015-06-08 Thread Abdiel Janulgue

terribly sorry for the typos:

On 06/08/2015 01:04 PM, Abdiel Janulgue wrote:
> 
> Although most of this patches were alread reviewed, I am resending them

*already

> due to additional changes suggested by Jani Nikula. In addition
> Mesa folks want RS to be working with hiccups on GEN8 as well so

*without

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 3/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-06-08 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

v2: Don't skip 1<<15 for the exec flags (Jani Nikula).
Add I915_PARAM_HAS_RESOURCE_STREAMER to check if kernel has RS support
baked in (Kenneth Graunke).

Testcase: igt/gem_exec_params
Cc: Jani Nikula 
Cc: kenn...@whitecape.org
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_dma.c|  3 +++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++
 include/uapi/drm/i915_drm.h|  8 +++-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 68e0c85..10d4ead 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -163,6 +163,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
if (!value)
return -ENODEV;
break;
+   case I915_PARAM_HAS_RESOURCE_STREAMER:
+   value = 1;
+   break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..ae064f0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1485,6 +1485,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 and 
above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..debccd34 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_REVISION  32
 #define I915_PARAM_SUBSLICE_TOTAL   33
 #define I915_PARAM_EU_TOTAL 34
+#define I915_PARAM_HAS_RESOURCE_STREAMER 35
 
 typedef struct drm_i915_getparam {
int param;
@@ -760,7 +761,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/3] drm/i915: Enable Resource Streamer state save/restore

2015-06-08 Thread Abdiel Janulgue
Also clarify comments on context size that the extra state for
Resource Streamer is included.

v2: Don't remove the extended save/restore enabled for older
platforms. (Ville)
Use new MI_SET_CONTEXT defines for HSW RS save/restore state
instead of extended save/restore. (Daniel)
v3: Add RS save restore for GEN8

Cc: ville.syrj...@linux.intel.com
Suggested-by: Chris Wilson 
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 4 +++-
 drivers/gpu/drm/i915/i915_reg.h | 5 -
 drivers/gpu/drm/i915/intel_lrc.c| 3 ++-
 drivers/gpu/drm/i915/intel_lrc.h| 1 +
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f3e84c4..a29dbcd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -509,7 +509,9 @@ mi_set_context(struct intel_engine_cs *ring,
}
 
/* These flags are for resource streamer on HSW+ */
-   if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
+   if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8)
+   flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
+   else if (INTEL_INFO(ring->dev)->gen < 8)
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 238bb25..2b1321d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -316,6 +316,8 @@
 #define   MI_RESTORE_EXT_STATE_EN  (1<<2)
 #define   MI_FORCE_RESTORE (1<<1)
 #define   MI_RESTORE_INHIBIT   (1<<0)
+#define   HSW_MI_RS_SAVE_STATE_EN   (1<<3)
+#define   HSW_MI_RS_RESTORE_STATE_EN(1<<2)
 #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT  MI_INSTR(0x1c, 2) /* GEN8+ */
@@ -2498,7 +2500,8 @@ enum skl_disp_power_wells {
  * valid. Now, docs explain in dwords what is in the context object. The full
  * size is 70720 bytes, however, the power context and execlist context will
  * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages.
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  */
 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
 /* Same as Haswell, but 72064 bytes now. */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 3b168f6..50af986 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1728,7 +1728,8 @@ populate_lr_context(struct intel_context *ctx, struct 
drm_i915_gem_object *ctx_o
reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring);
reg_state[CTX_CONTEXT_CONTROL+1] =
_MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH |
-   CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT |
+  CTX_CTRL_RS_CTX_ENABLE);
reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base);
reg_state[CTX_RING_HEAD+1] = 0;
reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base);
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index adb731e4..de6087a 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -32,6 +32,7 @@
 #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244)
 #define  CTX_CTRL_INHIBIT_SYN_CTX_SWITCH   (1 << 3)
 #define  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT   (1 << 0)
+#define   CTX_CTRL_RS_CTX_ENABLE(1 << 1)
 #define RING_CONTEXT_STATUS_BUF(ring)  ((ring)->mmio_base+0x370)
 #define RING_CONTEXT_STATUS_PTR(ring)  ((ring)->mmio_base+0x3a0)
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer v3

2015-06-08 Thread Abdiel Janulgue
Hi,

Although most of this patches were alread reviewed, I am resending them
due to additional changes suggested by Jani Nikula. In addition
Mesa folks want RS to be working with hiccups on GEN8 as well so
I added the necessary support for that platform as well.

Changes since last posting:
- Make RS context save and restore work properly on GEN8
- Add I915_PARAM_HAS_RESOURCE_STREAMER params.

[PATCH 1/3] drm/i915: Enable resource streamer bits on
[PATCH 2/3] drm/i915: Enable Resource Streamer state save/restore
[PATCH 3/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-06-08 Thread Abdiel Janulgue
Adds support for executing the resource streamer on BDW and HSW

v2: Add support for Execlists (Minu Mathai )

Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_lrc.c| 4 +++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++--
 drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b522eb6..238bb25 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -356,6 +356,7 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
 #define MI_PREDICATE_SRC0  (0x2400)
 #define MI_PREDICATE_SRC1  (0x2408)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..3b168f6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1172,7 +1172,9 @@ static int gen8_emit_bb_start(struct intel_ringbuffer 
*ringbuf,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8));
+   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 |
+   (ppgtt<<8) | (I915_DISPATCH_RS ?
+ MI_BATCH_RESOURCE_STREAMER : 0));
intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
intel_logical_ring_emit(ringbuf, MI_NOOP);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..715cb2a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2385,7 +2385,9 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ?
+MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
@@ -2408,7 +2410,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
-0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+   (dispatch_flags & I915_DISPATCH_RS ?
+MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..3521bc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -167,6 +167,7 @@ struct  intel_engine_cs {
   unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_engine_cs *ring);
 
/* GEN8 signal/wait table - never trust comments!
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 2/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-05-19 Thread Abdiel Janulgue
Adds support for executing the resource streamer on BDW and HSW

v2: Add support for Execlists (Minu Mathai )

Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_lrc.c| 3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 --
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b522eb6..238bb25 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -356,6 +356,7 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
 #define MI_PREDICATE_SRC0  (0x2400)
 #define MI_PREDICATE_SRC1  (0x2408)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..d523494 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1172,7 +1172,8 @@ static int gen8_emit_bb_start(struct intel_ringbuffer 
*ringbuf,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8));
+   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8) |
+   (I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER 
: 0));
intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
intel_logical_ring_emit(ringbuf, MI_NOOP);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..9045144 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
@@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
-0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-19 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

Testcase: igt/gem_exec_params
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 include/uapi/drm/i915_drm.h|  7 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..8a0abbb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 "
+ "and above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..3521bc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -167,6 +167,7 @@ struct  intel_engine_cs {
   unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_engine_cs *ring);
 
/* GEN8 signal/wait table - never trust comments!
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..a4c1a5c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<16)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH resend 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW

2015-05-19 Thread Abdiel Janulgue
Also clarify comments on context size that the extra state for
Resource Streamer is included.

v2: Don't remove the extended save/restore enabled for older
platforms. (Ville)
Use new MI_SET_CONTEXT defines for HSW RS save/restore state
instead of extended save/restore. (Daniel)

Suggested-by: Chris Wilson 
Reviewed-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 4 +++-
 drivers/gpu/drm/i915/i915_reg.h | 5 -
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f3e84c4..1a521dd 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -509,7 +509,9 @@ mi_set_context(struct intel_engine_cs *ring,
}
 
/* These flags are for resource streamer on HSW+ */
-   if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
+   if (IS_HASWELL(ring->dev))
+   flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
+   else if (INTEL_INFO(ring->dev)->gen < 8)
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 238bb25..2b1321d 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -316,6 +316,8 @@
 #define   MI_RESTORE_EXT_STATE_EN  (1<<2)
 #define   MI_FORCE_RESTORE (1<<1)
 #define   MI_RESTORE_INHIBIT   (1<<0)
+#define   HSW_MI_RS_SAVE_STATE_EN   (1<<3)
+#define   HSW_MI_RS_RESTORE_STATE_EN(1<<2)
 #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */
 #define   MI_SEMAPHORE_TARGET(engine)  ((engine)<<15)
 #define MI_SEMAPHORE_WAIT  MI_INSTR(0x1c, 2) /* GEN8+ */
@@ -2498,7 +2500,8 @@ enum skl_disp_power_wells {
  * valid. Now, docs explain in dwords what is in the context object. The full
  * size is 70720 bytes, however, the power context and execlist context will
  * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages.
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  */
 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
 /* Same as Haswell, but 72064 bytes now. */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v2] tests/gem_exec_params: check invalid flags for Resource Streamer

2015-05-19 Thread Abdiel Janulgue
Make sure resource streamer flags works only in correct ring in
addition to checking next flag after the RS boundary fails.

v2: Make sure we reject RS on pre-hsw.

Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_exec_params.c | 27 ++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
index 54f0dc3..f374226 100644
--- a/tests/gem_exec_params.c
+++ b/tests/gem_exec_params.c
@@ -48,6 +48,7 @@
 #define LOCAL_I915_EXEC_BSD_MASK (3<<13)
 #define LOCAL_I915_EXEC_BSD_RING1 (1<<13)
 #define LOCAL_I915_EXEC_BSD_RING2 (2<<13)
+#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<16)
 
 struct drm_i915_gem_execbuffer2 execbuf;
 struct drm_i915_gem_exec_object2 gem_exec[1];
@@ -220,7 +221,7 @@ igt_main
/* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle 
*/
 
igt_subtest("invalid-flag") {
-   execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1);
+   execbuf.flags = I915_EXEC_RENDER | 
(LOCAL_I915_EXEC_RESOURCE_STREAMER << 1);
RUN_FAIL(EINVAL);
}
 
@@ -234,6 +235,30 @@ igt_main
execbuf.num_cliprects = 0;
}
 
+   igt_subtest("rs-invalid-on-bsd-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_BSD | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-on-blt-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_BLT | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-on-vebox-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_VEBOX | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-gen") {
+   igt_require(!IS_HASWELL(devid) && intel_gen(devid) < 8);
+   execbuf.flags = I915_EXEC_RENDER | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
 #define DIRT(name) \
igt_subtest(#name "-dirt") { \
execbuf.flags = 0; \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-19 Thread Abdiel Janulgue


On 05/18/2015 05:55 PM, Chris Wilson wrote:
> On Mon, May 18, 2015 at 11:31:54AM +0300, Abdiel Janulgue wrote:
>> Ensures that the batch buffer is executed by the resource streamer
>>
>> Signed-off-by: Abdiel Janulgue 
> 
> 1-3:
> Reviewed-by: Chris Wilson 
> 
> Now all you have to do is satisfy Daniel with a few igt.
> -Chris
> 

Thanks for the review! :)
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW

2015-05-19 Thread Abdiel Janulgue


On 05/19/2015 11:26 AM, Daniel Vetter wrote:
> On Tue, May 19, 2015 at 09:58:52AM +0300, Abdiel Janulgue wrote:
>>
>>
>> On 05/18/2015 07:07 PM, Ville Syrjälä wrote:
>>> On Mon, May 18, 2015 at 04:41:51PM +0100, Chris Wilson wrote:
>>>> On Mon, May 18, 2015 at 06:36:18PM +0300, Ville Syrjälä wrote:
>>>>> On Mon, May 18, 2015 at 11:31:56AM +0300, Abdiel Janulgue wrote:
>>>>>> Also clarify comments on context size that the extra state for
>>>>>> Resource Streamer is included.
>>>>>>
>>>>>> Suggested-by: Chris Wilson 
>>>>>> Signed-off-by: Abdiel Janulgue 
>>>>>> ---
>>>>>>  drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
>>>>>>  drivers/gpu/drm/i915/i915_reg.h | 3 ++-
>>>>>>  2 files changed, 3 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>>>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>>>>> index f3e84c4..1db107a 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>>>>> @@ -509,7 +509,7 @@ mi_set_context(struct intel_engine_cs *ring,
>>>>>>  }
>>>>>>  
>>>>>>  /* These flags are for resource streamer on HSW+ */
>>>>>> -if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
>>>>>> +if (IS_HASWELL(ring->dev))
>>>>>>  flags |= (MI_SAVE_EXT_STATE_EN | 
>>>>>> MI_RESTORE_EXT_STATE_EN);
>>>>>
>>>>> I don't get it. Previously we told the hardware to save the extended
>>>>> context on !hsw, and now we don't. That doesn't seem correct to me.
>>>>
>>>> We don't use the extended state elsewhere.
>>>
>>> Umm. On SNB at least 3DSTATE_POLY_STIPPLE_PATTERN seems to be part of
>>> the extended state, and on IVB/VLV SOL state is there. Mesa uses all of
>>> that.
>>>
>>>> I'd always been dubious of
>>>> the origins/intentions of this line of code since it claims only to be
>>>> for enabling RS on HSW...
>>>>
>>>> i.e. commit e80f14b6d36e3e07111cf2ab084ef8dd5d015ce2
>>>> Author: Ben Widawsky 
>>>> Date:   Mon Aug 18 10:35:28 2014 -0700
>>>>
>>>> drm/i915: Don't save/restore RS when not used
>>>>  
>>>> was backwards.
>>>
>>> ? It did exactly what it said, ie. avoid setting the RS save/restore
>>> bits on HSW+ while leaving the ext save/restore enabled on older
>>> platforms.
>>>
>>
>> Another option is to enable extended state save restore for both HSW and
>> the older platforms so we would get both features (RS save/restore and
>> Extended State Save enable)?
>>
>> Seems the reason for this confusion is that the we have the exact same
>> bit positions in MI_SET_CONTEXT but it got renamed starting from HSW and up.
> 
> If the bit has been renamed it might be good to add at least a new #define
> with a _HSw suffix and better name, just to make it clear what's going on.
> gcc will see through this and fold down the different conditions to just
> one.

I'll do that in the next revision and also add the igt tag you require.
In the meantime, please check the igt approach I sent is the correct one.

Thanks!
-Abdiel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH] tests/gem_exec_params: check invalid flags for Resource Streamer

2015-05-19 Thread Abdiel Janulgue
Make sure resource streamer flags works only in correct ring in
addition to checking next flag after the RS boundary fails.

Cc: Daniel Vetter 
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_exec_params.c | 21 -
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
index 54f0dc3..08ee330 100644
--- a/tests/gem_exec_params.c
+++ b/tests/gem_exec_params.c
@@ -48,6 +48,7 @@
 #define LOCAL_I915_EXEC_BSD_MASK (3<<13)
 #define LOCAL_I915_EXEC_BSD_RING1 (1<<13)
 #define LOCAL_I915_EXEC_BSD_RING2 (2<<13)
+#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<16)
 
 struct drm_i915_gem_execbuffer2 execbuf;
 struct drm_i915_gem_exec_object2 gem_exec[1];
@@ -220,7 +221,7 @@ igt_main
/* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle 
*/
 
igt_subtest("invalid-flag") {
-   execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1);
+   execbuf.flags = I915_EXEC_RENDER | 
(LOCAL_I915_EXEC_RESOURCE_STREAMER << 1);
RUN_FAIL(EINVAL);
}
 
@@ -234,6 +235,24 @@ igt_main
execbuf.num_cliprects = 0;
}
 
+   igt_subtest("rs-invalid-on-bsd-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_BSD | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-on-blt-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_BLT | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
+   igt_subtest("rs-invalid-on-vebox-ring") {
+   igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8);
+   execbuf.flags = I915_EXEC_VEBOX | 
LOCAL_I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
 #define DIRT(name) \
igt_subtest(#name "-dirt") { \
execbuf.flags = 0; \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW

2015-05-18 Thread Abdiel Janulgue


On 05/18/2015 07:07 PM, Ville Syrjälä wrote:
> On Mon, May 18, 2015 at 04:41:51PM +0100, Chris Wilson wrote:
>> On Mon, May 18, 2015 at 06:36:18PM +0300, Ville Syrjälä wrote:
>>> On Mon, May 18, 2015 at 11:31:56AM +0300, Abdiel Janulgue wrote:
>>>> Also clarify comments on context size that the extra state for
>>>> Resource Streamer is included.
>>>>
>>>> Suggested-by: Chris Wilson 
>>>> Signed-off-by: Abdiel Janulgue 
>>>> ---
>>>>  drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
>>>>  drivers/gpu/drm/i915/i915_reg.h | 3 ++-
>>>>  2 files changed, 3 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
>>>> b/drivers/gpu/drm/i915/i915_gem_context.c
>>>> index f3e84c4..1db107a 100644
>>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c
>>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c
>>>> @@ -509,7 +509,7 @@ mi_set_context(struct intel_engine_cs *ring,
>>>>}
>>>>  
>>>>/* These flags are for resource streamer on HSW+ */
>>>> -  if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
>>>> +  if (IS_HASWELL(ring->dev))
>>>>flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
>>>
>>> I don't get it. Previously we told the hardware to save the extended
>>> context on !hsw, and now we don't. That doesn't seem correct to me.
>>
>> We don't use the extended state elsewhere.
> 
> Umm. On SNB at least 3DSTATE_POLY_STIPPLE_PATTERN seems to be part of
> the extended state, and on IVB/VLV SOL state is there. Mesa uses all of
> that.
> 
>> I'd always been dubious of
>> the origins/intentions of this line of code since it claims only to be
>> for enabling RS on HSW...
>>
>> i.e. commit e80f14b6d36e3e07111cf2ab084ef8dd5d015ce2
>> Author: Ben Widawsky 
>> Date:   Mon Aug 18 10:35:28 2014 -0700
>>
>> drm/i915: Don't save/restore RS when not used
>>  
>> was backwards.
> 
> ? It did exactly what it said, ie. avoid setting the RS save/restore
> bits on HSW+ while leaving the ext save/restore enabled on older
> platforms.
> 

Another option is to enable extended state save restore for both HSW and
the older platforms so we would get both features (RS save/restore and
Extended State Save enable)?

Seems the reason for this confusion is that the we have the exact same
bit positions in MI_SET_CONTEXT but it got renamed starting from HSW and up.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH i-g-t] tests/gem_exec_params: check invalid flags for Resource Streamer

2015-05-18 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 tests/gem_exec_params.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
index 54f0dc3..fd9d7bd 100644
--- a/tests/gem_exec_params.c
+++ b/tests/gem_exec_params.c
@@ -220,7 +220,7 @@ igt_main
/* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle 
*/
 
igt_subtest("invalid-flag") {
-   execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1);
+   execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_RESOURCE_STREAMER 
<< 1);
RUN_FAIL(EINVAL);
}
 
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH v3 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-18 Thread Abdiel Janulgue


On 05/18/2015 12:01 PM, Daniel Vetter wrote:
> On Mon, May 18, 2015 at 11:31:54AM +0300, Abdiel Janulgue wrote:
>> Ensures that the batch buffer is executed by the resource streamer
>>
>> Signed-off-by: Abdiel Janulgue 
> 
> Maybe I missed them, but we also need a patch to update gem_exec_params
> from igt.  At least the invalid-flag subtest should fail with this
> applied. Also please add a Testcase: tag once you've added the testcase
> for this new flag.

I'm not sure what you mean here. When I run unmodified gem_exec_params
with this resource streamer patches applied. I get this results, at
least on the invalid-flag section:

Subtest invalid-flag: SUCCESS (0.000s)

Is this supposed to fail?

-abdiel

> 
>> ---
>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
>>  drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
>>  include/uapi/drm/i915_drm.h|  7 ++-
>>  3 files changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index a3190e79..8a0abbb 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
>> *data,
>>  return -EINVAL;
>>  }
>>  
>> +if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
>> +if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
>> +DRM_DEBUG("RS is only allowed for Haswell, Gen8 "
>> +  "and above\n");
>> +return -EINVAL;
>> +}
>> +if (ring->id != RCS) {
>> +DRM_DEBUG("RS is not available on %s\n",
>> + ring->name);
>> +return -EINVAL;
>> +}
>> +
>> +dispatch_flags |= I915_DISPATCH_RS;
>> +}
>> +
>>  intel_runtime_pm_get(dev_priv);
>>  
>>  ret = i915_mutex_lock_interruptible(dev);
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index c761fe0..3521bc0 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -167,6 +167,7 @@ struct  intel_engine_cs {
>> unsigned dispatch_flags);
>>  #define I915_DISPATCH_SECURE 0x1
>>  #define I915_DISPATCH_PINNED 0x2
>> +#define I915_DISPATCH_RS 0x4
>>  void(*cleanup)(struct intel_engine_cs *ring);
>>  
>>  /* GEN8 signal/wait table - never trust comments!
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 551b673..a4c1a5c 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 {
>>  #define I915_EXEC_BSD_RING1 (1<<13)
>>  #define I915_EXEC_BSD_RING2 (2<<13)
>>  
>> -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
>> +/** Tell the kernel that the batchbuffer is processed by
>> + *  the resource streamer.
>> + */
>> +#define I915_EXEC_RESOURCE_STREAMER (1<<16)
>> +
>> +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
>>  
>>  #define I915_EXEC_CONTEXT_ID_MASK   (0x)
>>  #define i915_execbuffer2_set_context_id(eb2, context) \
>> -- 
>> 1.9.1
>>
>> ___
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 2/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-05-18 Thread Abdiel Janulgue
Adds support for executing the resource streamer on BDW and HSW

v2: Add support for Execlists (Minu Mathai )

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_lrc.c| 3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 --
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b522eb6..238bb25 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -356,6 +356,7 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
 #define MI_PREDICATE_SRC0  (0x2400)
 #define MI_PREDICATE_SRC1  (0x2408)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..d523494 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1172,7 +1172,8 @@ static int gen8_emit_bb_start(struct intel_ringbuffer 
*ringbuf,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8));
+   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8) |
+   (I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER 
: 0));
intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
intel_logical_ring_emit(ringbuf, MI_NOOP);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..9045144 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
@@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
-0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW

2015-05-18 Thread Abdiel Janulgue
Also clarify comments on context size that the extra state for
Resource Streamer is included.

Suggested-by: Chris Wilson 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_context.c | 2 +-
 drivers/gpu/drm/i915/i915_reg.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c 
b/drivers/gpu/drm/i915/i915_gem_context.c
index f3e84c4..1db107a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -509,7 +509,7 @@ mi_set_context(struct intel_engine_cs *ring,
}
 
/* These flags are for resource streamer on HSW+ */
-   if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
+   if (IS_HASWELL(ring->dev))
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 238bb25..3db0596 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2498,7 +2498,8 @@ enum skl_disp_power_wells {
  * valid. Now, docs explain in dwords what is in the context object. The full
  * size is 70720 bytes, however, the power context and execlist context will
  * never be saved (power context is stored elsewhere, and execlists don't work
- * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages.
+ * on HSW) - so the final size, including the extra state required for the
+ * Resource Streamer, is 66944 bytes, which rounds to 17 pages.
  */
 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
 /* Same as Haswell, but 72064 bytes now. */
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH v3 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-18 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 include/uapi/drm/i915_drm.h|  7 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..8a0abbb 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 "
+ "and above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..3521bc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -167,6 +167,7 @@ struct  intel_engine_cs {
   unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_engine_cs *ring);
 
/* GEN8 signal/wait table - never trust comments!
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..a4c1a5c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<16)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-05-15 Thread Abdiel Janulgue
Hi,

On 05/13/2015 01:22 PM, Chris Wilson wrote:
> On Wed, May 13, 2015 at 11:13:24AM +0300, Abdiel Janulgue wrote:
>> Adds support for executing the resource streamer on BDW and HSW
>>
>> v2: Add support for Execlists (Minu Mathai )
>>
>> Signed-off-by: Abdiel Janulgue 
> 
> I would have liked to have seen the comments for HSW_CTX_TOTAL_SIZE
> updated to include the resource streamer requirements.

Comment block mention HSW_CTX_TOTAL_SIZE as 70720 bytes which already
implicitly include RS dwords.
I can add "70720 bytes include resource streamer dwords" to the comments
or do you want me to break this down further still?

> 
> Also
> 
> /* These flags are for resource streamer on HSW+ */
> if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
>   flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
> 
> implies to me that we should be setting something for hsw to
> save/restore RS that we do not currently. So either the comment needs
> fixing, or we have a lack of code.

I checked, the current code does disable context save and restore for RS
in HSW (commit e80f14b6d36e3e07111cf2ab084ef8dd5d015ce2)

But I've now modified this to:

if (IS_HASWELL(ring->dev))
flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);


so that MI_SET_CONTEXT includes RS context on HSW.

Anyway, this seems to work perfectly fine in HSW as well. I'll include
that in the next version.

> 
> Note that intel_lrc.c has duplicate functions for gen8. :|
> -Chris
> 
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-13 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 include/uapi/drm/i915_drm.h|  7 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..f937ad0 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
return -EINVAL;
}
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 "
+ "and above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s)\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
intel_runtime_pm_get(dev_priv);
 
ret = i915_mutex_lock_interruptible(dev);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..3521bc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -167,6 +167,7 @@ struct  intel_engine_cs {
   unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_engine_cs *ring);
 
/* GEN8 signal/wait table - never trust comments!
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..a4c1a5c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<16)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-05-13 Thread Abdiel Janulgue
Adds support for executing the resource streamer on BDW and HSW

v2: Add support for Execlists (Minu Mathai )

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_lrc.c| 3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 --
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b522eb6..238bb25 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -356,6 +356,7 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
 #define MI_PREDICATE_SRC0  (0x2400)
 #define MI_PREDICATE_SRC1  (0x2408)
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index fcb074b..d523494 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1172,7 +1172,8 @@ static int gen8_emit_bb_start(struct intel_ringbuffer 
*ringbuf,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8));
+   intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | 
(ppgtt<<8) |
+   (I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER 
: 0));
intel_logical_ring_emit(ringbuf, lower_32_bits(offset));
intel_logical_ring_emit(ringbuf, upper_32_bits(offset));
intel_logical_ring_emit(ringbuf, MI_NOOP);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..9045144 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
@@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
-0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer v2

2015-05-13 Thread Abdiel Janulgue
Changes since initial posting:

- Don't split the conversion from args->flags to ring from its
  subsequent EINVAL check (Chris Wilson )
- Execlists support (Minu Mathai )

--
Abdiel Janulgue (2):
  drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag
  drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START

 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 drivers/gpu/drm/i915/intel_lrc.c   |  3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.c|  6 --
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 include/uapi/drm/i915_drm.h|  7 ++-
 6 files changed, 29 insertions(+), 4 deletions(-)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-13 Thread Abdiel Janulgue


On 05/12/2015 12:49 PM, Chris Wilson wrote:
> On Mon, May 11, 2015 at 12:01:11PM +0300, Abdiel Janulgue wrote:
>> Ensures that the batch buffer is executed by the resource streamer
>>
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
>>  drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
>>  include/uapi/drm/i915_drm.h|  7 ++-
>>  3 files changed, 22 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
>> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> index a3190e79..afbd3c16 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
>> @@ -1474,6 +1474,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
>> *data,
>>  } else
>>  ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
>>  
>> +if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
>> +if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
>> +DRM_DEBUG("RS is only allowed for Haswell, Gen8 "
>> +  "and above\n");
>> +return -EINVAL;
>> +}
>> +if (ring->id != RCS) {
>> +DRM_DEBUG("RS is not available on %s)\n",
>> + ring->name);
>> +return -EINVAL;
>> +}
>> +
>> +dispatch_flags |= I915_DISPATCH_RS;
>> +}
>> +
>>  if (!intel_ring_initialized(ring)) {
> 
> Please don't split the conversion from args->flags to ring from its
> subsequent EINVAL check.
> -Chris
> 

I'll include this change in the next version.
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer

2015-05-12 Thread Abdiel Janulgue


On 05/11/2015 12:26 PM, Chris Wilson wrote:
> On Mon, May 11, 2015 at 12:01:10PM +0300, Abdiel Janulgue wrote:
>> This is a re-spin of my resource streamer patchset from a year ago.
>> The resource streamer is a hw-feature that helps in reducing commands
>> submitted by the CPU.
> 
> Did you check that the contexts we allocate are large enough to hold the
> extra RS state? I can't remember if the size we settled on was
> sufficient or not...
> -Chris
> 

I checked HSW_CXT_TOTAL_SIZE and GEN8_CXT_TOTAL_SIZE to the values in
Bspec, the sizes does indeed contain the resource streamer dwords. So I
guess the answer is it's sufficient.

-Abdiel
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag

2015-05-11 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 include/uapi/drm/i915_drm.h|  7 ++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a3190e79..afbd3c16 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1474,6 +1474,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
} else
ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1];
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) {
+   DRM_DEBUG("RS is only allowed for Haswell, Gen8 "
+ "and above\n");
+   return -EINVAL;
+   }
+   if (ring->id != RCS) {
+   DRM_DEBUG("RS is not available on %s)\n",
+ring->name);
+   return -EINVAL;
+   }
+
+   dispatch_flags |= I915_DISPATCH_RS;
+   }
+
if (!intel_ring_initialized(ring)) {
DRM_DEBUG("execbuf with invalid ring: %d\n",
  (int)(args->flags & I915_EXEC_RING_MASK));
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c761fe0..3521bc0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -167,6 +167,7 @@ struct  intel_engine_cs {
   unsigned dispatch_flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_engine_cs *ring);
 
/* GEN8 signal/wait table - never trust comments!
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 551b673..a4c1a5c 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD_RING1(1<<13)
 #define I915_EXEC_BSD_RING2(2<<13)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<16)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer

2015-05-11 Thread Abdiel Janulgue
This is a re-spin of my resource streamer patchset from a year ago.
The resource streamer is a hw-feature that helps in reducing commands
submitted by the CPU.

We have finally have the Mesa optimization that requires the use of
this interface.

Abdiel Janulgue (2):
  drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag
  drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START

 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++
 drivers/gpu/drm/i915/i915_reg.h|  1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|  6 --
 drivers/gpu/drm/i915/intel_ringbuffer.h|  1 +
 include/uapi/drm/i915_drm.h|  7 ++-
 5 files changed, 26 insertions(+), 3 deletions(-)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START

2015-05-11 Thread Abdiel Janulgue
Adds support for executing the resource streamer on BDW

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 6 --
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b522eb6..238bb25 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -356,6 +356,7 @@
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 
 #define MI_PREDICATE_SRC0  (0x2400)
 #define MI_PREDICATE_SRC1  (0x2408)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 441e250..9045144 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
@@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
intel_ring_emit(ring,
MI_BATCH_BUFFER_START |
(dispatch_flags & I915_DISPATCH_SECURE ?
-0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
+0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) |
+   (dispatch_flags & I915_DISPATCH_RS ? 
MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.9.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] drm/i915/bdw: Enable resource streamer on Broadwell

2014-06-11 Thread Abdiel Janulgue


On 11.06.2014 11:10, Jesse Barnes wrote:

On Tue,  6 May 2014 22:25:04 +0300
Abdiel Janulgue  wrote:


From: Abdiel Janulgue 

This is a re-spin of my resource streamer patchset from October
adapted to enable the feature on Broadwell instead.

The resource streamer is a hw-feature that helps in reducing commands
being submitted by the CPU. Haswell initially has this feature.
Unfortunately, HSW seems to have problems switching between non-RS
and RS-enabled commands[1]. On BDW however it works seamlessly
as expected.

The i-g-t test comes next on a separate patch-series.

--
[1] http://lists.freedesktop.org/archives/intel-gfx/2014-May/044577.html

Abdiel Janulgue (2):
   drm/i915/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START
   drm/i915/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag

  drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 
  drivers/gpu/drm/i915/i915_reg.h|1 +
  drivers/gpu/drm/i915/intel_ringbuffer.c|3 ++-
  drivers/gpu/drm/i915/intel_ringbuffer.h|1 +
  include/uapi/drm/i915_drm.h|7 ++-
  5 files changed, 18 insertions(+), 2 deletions(-)

These seem trivial enough... have you seen cases where you'd like to
enable it in Mesa?  If so, it probably makes sense to merge this patch
so you can do your tuning and enabling on the Mesa side...


I've had some Mesa patches since last year. Unfortunately, the changes 
didn't
give any dramatic performance improvements. On the other hand, I admit: 
1. I've
only run it against GLBenchmark and Unigine benchmarks. Discussion with 
Portland

folks seem to suggest that we need to have a comprehensive run with more
benchmarks. 2. I only did the benchmarks in Haswell. Would be nice to see
how it does on Broadwell.

I agree though that further tuning of the performance needs to be done on
the Mesa side.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables

2014-05-07 Thread Abdiel Janulgue
On Wednesday, May 07, 2014 02:49:31 PM Ville Syrjälä wrote:
> I quickly cobbled together a hsw version of this and gave it a whirl on
> one machine. Seems to work just fine here, and no lockups when switching
> between hw and sw binding tables. Did you get the lockups on hsw even
> with rendercopy?
> 
> Here's my hsw version:
> 
> 
> +static void
> +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
> +{
> + if (!enable) {
> + OUT_BATCH(MI_RS_CONTROL | 0x0);
> +
> + OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2));
> + /* binding table pool base address */

This bit I missed and the source of my troubles for the past few months.

> + OUT_BATCH(3 << 5);

Yep, I confirm toggling on HSW does work quite well now. I'll now update the 
patches to include HSW path on the kernel. I also take back my previous 
statement that RS is broken on HSW! :)

Thanks a lot Ville!


> + /* Upper bound */
> + OUT_BATCH(0);
> +
> + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
> + OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL |
> GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0);
> + OUT_BATCH(0);
> +
> + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2));
> + OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE);
> + OUT_BATCH(0);
> + OUT_BATCH(0);
> +
> + return;
> +}

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables

2014-05-06 Thread Abdiel Janulgue
On Wednesday, May 07, 2014 12:38:04 AM Ville Syrjälä wrote:
> On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote:
> > Use on-chip hw-binding table generator to generate binding
> > tables when the test emits SURFACE_STATES packets. The hw generates
> > these binding table packets internally so we don't have to
> > allocate space on the batchbuffer.
> > 
> > Signed-off-by: Abdiel Janulgue 
> > ---
> > 
> >  lib/gen8_render.h   |   13 +++
> >  lib/intel_batchbuffer.c |   12 ++
> >  lib/intel_batchbuffer.h |3 ++
> >  lib/intel_reg.h |3 ++
> >  lib/rendercopy_gen8.c   |   97
> >  --- 5 files changed, 123
> >  insertions(+), 5 deletions(-)
> > 
> > diff --git a/lib/gen8_render.h b/lib/gen8_render.h
> > index fffc100..4d5f7ba 100644
> > --- a/lib/gen8_render.h
> > +++ b/lib/gen8_render.h
> > @@ -83,6 +83,19 @@
> > 
> >  /* Random shifts */
> >  #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
> > 
> > +/* GEN8+ resource streamer */
> > +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC   (0x7919 << 16) /*
> > GEN8 */ +# define BINDING_TABLE_POOL_ENABLE  0x0800
> > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS  (0x7843 << 16) /*
> > GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS  (0x7844 <<
> > 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS 
> > (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS
> >  (0x7846 << 16) /* GEN8 */ +#define
> > GEN8_3DSTATE_BINDING_TABLE_EDIT_PS  (0x7847 << 16) /* GEN8 */ +
> > +/* Toggling RS needs PIPE_CONTROL SC invalidate */
> > +#define _3DSTATE_PIPE_CONTROL  ((0x3 << 29) | (3 << 27) | (2 
> > << 24))
> > +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1 << 2)
> > +
> > 
> >  /* Shamelessly ripped from mesa */
> >  struct gen8_surface_state
> >  {
> > 
> > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> > index e868922..26c9b89 100644
> > --- a/lib/intel_batchbuffer.c
> > +++ b/lib/intel_batchbuffer.c
> > @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer
> > *batch)
> > 
> > memset(batch->buffer, 0, sizeof(batch->buffer));
> > 
> > batch->ptr = batch->buffer;
> > 
> > +
> > +   if (batch->hw_bt_pool_bo != NULL) {
> > +   drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> > +   batch->hw_bt_pool_bo = NULL;
> > +   }
> > +
> > +   batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
> > + 131072, 4096);
> > 
> >  }
> >  
> >  /**
> > 
> > @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer
> > *batch)> 
> >  {
> >  
> > drm_intel_bo_unreference(batch->bo);
> > batch->bo = NULL;
> > 
> > +
> > +   drm_intel_bo_unreference(batch->hw_bt_pool_bo);
> > +   batch->hw_bt_pool_bo = NULL;
> > +
> > 
> > free(batch);
> >  
> >  }
> > 
> > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
> > index 49dbcf0..2a516bc 100644
> > --- a/lib/intel_batchbuffer.h
> > +++ b/lib/intel_batchbuffer.h
> > @@ -18,6 +18,9 @@ struct intel_batchbuffer {
> > 
> > uint8_t buffer[BATCH_SZ];
> > uint8_t *ptr;
> > uint8_t *state;
> > 
> > +
> > +   drm_intel_bo *hw_bt_pool_bo;
> > +   int use_resource_streamer;
> > 
> >  };
> >  
> >  struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr
> >  *bufmgr,
> > 
> > diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> > index 4b3a102..1cb9a29 100644
> > --- a/lib/intel_reg.h
> > +++ b/lib/intel_reg.h
> > @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> > SOFTWARE.> 
> >  #define MI_BATCH_NON_SECURE(1)
> >  #define MI_BATCH_NON_SECURE_I965   (1 << 8)
> > 
> > +/* Resource Streamer */
> > +#define MI_RS_CONTROL   (0x6 << 23)
> > +
> > 
> >  #define MAX_DISPLAY_PIPES  2
> >  
> >  typedef enum {
> > 
> > diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
> > index 6f5a698..b0dd0f3 100644
> > --- a/lib/rendercopy_gen8.c
> > +++ b/lib/rendercopy_gen8.c
> > @@ -169,12 +169,14 @@ static void
&

[Intel-gfx] [PATCH] igt tests/gem_exec_params: Test for -EINVAL for invalid platform and ring when executing RS

2014-05-06 Thread Abdiel Janulgue
Add test that makes sure RS bit only gets executed on BDW and
on the render ring.

Signed-off-by: Abdiel Janulgue 
---
 tests/gem_exec_params.c |   11 +++
 1 file changed, 11 insertions(+)

diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c
index 769969d..a3f765b 100644
--- a/tests/gem_exec_params.c
+++ b/tests/gem_exec_params.c
@@ -193,6 +193,17 @@ igt_main
execbuf.num_cliprects = 0;
}
 
+   igt_subtest("rs-not-gen8") {
+igt_require(intel_gen(devid) != 8);
+execbuf.flags = I915_EXEC_RENDER | I915_EXEC_RESOURCE_STREAMER;
+RUN_FAIL(EINVAL);
+}
+
+   igt_subtest("invalid-rs-ring") {
+   execbuf.flags = I915_EXEC_RESOURCE_STREAMER;
+   RUN_FAIL(EINVAL);
+   }
+
 #define DIRT(name) \
igt_subtest(#name "-dirt") { \
execbuf.flags = 0; \
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables

2014-05-06 Thread Abdiel Janulgue
Use on-chip hw-binding table generator to generate binding
tables when the test emits SURFACE_STATES packets. The hw generates
these binding table packets internally so we don't have to
allocate space on the batchbuffer.

Signed-off-by: Abdiel Janulgue 
---
 lib/gen8_render.h   |   13 +++
 lib/intel_batchbuffer.c |   12 ++
 lib/intel_batchbuffer.h |3 ++
 lib/intel_reg.h |3 ++
 lib/rendercopy_gen8.c   |   97 ---
 5 files changed, 123 insertions(+), 5 deletions(-)

diff --git a/lib/gen8_render.h b/lib/gen8_render.h
index fffc100..4d5f7ba 100644
--- a/lib/gen8_render.h
+++ b/lib/gen8_render.h
@@ -83,6 +83,19 @@
 /* Random shifts */
 #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23
 
+/* GEN8+ resource streamer */
+#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC   (0x7919 << 16) /* GEN8 */
+# define BINDING_TABLE_POOL_ENABLE  0x0800
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS  (0x7843 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS  (0x7844 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS  (0x7845 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS  (0x7846 << 16) /* GEN8 */
+#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS  (0x7847 << 16) /* GEN8 */
+
+/* Toggling RS needs PIPE_CONTROL SC invalidate */
+#define _3DSTATE_PIPE_CONTROL  ((0x3 << 29) | (3 << 27) | (2 << 24))
+#define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1 << 2)
+
 /* Shamelessly ripped from mesa */
 struct gen8_surface_state
 {
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index e868922..26c9b89 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
memset(batch->buffer, 0, sizeof(batch->buffer));
 
batch->ptr = batch->buffer;
+
+   if (batch->hw_bt_pool_bo != NULL) {
+   drm_intel_bo_unreference(batch->hw_bt_pool_bo);
+   batch->hw_bt_pool_bo = NULL;
+   }
+
+   batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt",
+ 131072, 4096);
 }
 
 /**
@@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
drm_intel_bo_unreference(batch->bo);
batch->bo = NULL;
+
+   drm_intel_bo_unreference(batch->hw_bt_pool_bo);
+   batch->hw_bt_pool_bo = NULL;
+
free(batch);
 }
 
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 49dbcf0..2a516bc 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -18,6 +18,9 @@ struct intel_batchbuffer {
uint8_t buffer[BATCH_SZ];
uint8_t *ptr;
uint8_t *state;
+
+   drm_intel_bo *hw_bt_pool_bo;
+   int use_resource_streamer;
 };
 
 struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr,
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index 4b3a102..1cb9a29 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define MI_BATCH_NON_SECURE(1)
 #define MI_BATCH_NON_SECURE_I965   (1 << 8)
 
+/* Resource Streamer */
+#define MI_RS_CONTROL   (0x6 << 23)
+
 #define MAX_DISPLAY_PIPES  2
 
 typedef enum {
diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c
index 6f5a698..b0dd0f3 100644
--- a/lib/rendercopy_gen8.c
+++ b/lib/rendercopy_gen8.c
@@ -169,12 +169,14 @@ static void
 gen6_render_flush(struct intel_batchbuffer *batch,
  drm_intel_context *context, uint32_t batch_end)
 {
-   int ret;
+   int ret = 0;
+   uint32_t flags = 0;
 
ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer);
+   flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0;
if (ret == 0)
ret = drm_intel_gem_bo_context_exec(batch->bo, context,
-   batch_end, 0);
+   batch_end, flags | 
I915_EXEC_RENDER);
assert(ret == 0);
 }
 
@@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct 
igt_buf *buf,
return offset;
 }
 
+static void
+gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable)
+{
+   if (!enable) {
+   OUT_BATCH(MI_RS_CONTROL | 0x0);
+
+   OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2));
+   /* binding table pool base address */
+   OUT_BATCH(0);
+   OUT_BATCH(0);
+   /* Upper bound */
+   OUT_BATCH(0);
+
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
+   OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+   OUT_BATCH(0);
+ 

[Intel-gfx] [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer

2014-05-06 Thread Abdiel Janulgue
Add option in basic test for the render_copy to test and toggle
hw-generated binding tables feature.

Signed-off-by: Abdiel Janulgue 
---
 tests/gem_render_copy.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/gem_render_copy.c b/tests/gem_render_copy.c
index 331b7ce..c9dfba0 100644
--- a/tests/gem_render_copy.c
+++ b/tests/gem_render_copy.c
@@ -131,14 +131,19 @@ int main(int argc, char **argv)
int opt;
int opt_dump_png = false;
int opt_dump_aub = igt_aub_dump_enabled();
+   int opt_use_rs = false;
 
igt_simple_init();
 
-   while ((opt = getopt(argc, argv, "d")) != -1) {
+   while ((opt = getopt(argc, argv, "d:r")) != -1) {
switch (opt) {
case 'd':
opt_dump_png = true;
break;
+   case 'r':
+   printf("Use resource streamer\n");
+   opt_use_rs = true;
+   break;
default:
break;
}
@@ -156,6 +161,7 @@ int main(int argc, char **argv)
  "no render-copy function\n");
 
batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+   batch->use_resource_streamer = opt_use_rs;
igt_assert(batch);
}
 
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] igt tests/rendercopy: Add option to test resource streamer functionality

2014-05-06 Thread Abdiel Janulgue
This patchseries tests a resource streamer feature called hw-generated binding 
tables,
which can be enabled from userspace. It is essentially a newer format that uses 
the hw to
generate and submit binding tables to the GPU. It uses the basic rendercopy 
test to toggle
the feature on and off and to verify if the resulting data is still correct.

Abdiel Janulgue (2):
  rendercopy/bdw: Enable hw-generated binding tables
  tests/gem_render_copy: Add option to test resource streamer

 lib/gen8_render.h   |   13 +++
 lib/intel_batchbuffer.c |   12 ++
 lib/intel_batchbuffer.h |3 ++
 lib/intel_reg.h |3 ++
 lib/rendercopy_gen8.c   |   97 ---
 tests/gem_render_copy.c |8 +++-
 6 files changed, 130 insertions(+), 6 deletions(-)



___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] drm/i915/bdw: Enable resource streamer on Broadwell

2014-05-06 Thread Abdiel Janulgue
From: Abdiel Janulgue 

This is a re-spin of my resource streamer patchset from October
adapted to enable the feature on Broadwell instead.

The resource streamer is a hw-feature that helps in reducing commands
being submitted by the CPU. Haswell initially has this feature.
Unfortunately, HSW seems to have problems switching between non-RS
and RS-enabled commands[1]. On BDW however it works seamlessly
as expected.

The i-g-t test comes next on a separate patch-series.

--
[1] http://lists.freedesktop.org/archives/intel-gfx/2014-May/044577.html 

Abdiel Janulgue (2):
  drm/i915/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START
  drm/i915/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag

 drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 
 drivers/gpu/drm/i915/i915_reg.h|1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c|3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h|1 +
 include/uapi/drm/i915_drm.h|7 ++-
 5 files changed, 18 insertions(+), 2 deletions(-)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag

2014-05-06 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 
 include/uapi/drm/i915_drm.h|7 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 47fe8ec..68aaa9b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1074,6 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
if (args->flags & I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
 
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if ((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_RENDER ||
+   !IS_BROADWELL(dev))
+   return -EINVAL;
+
+   flags |= I915_DISPATCH_RS;
+   }
+
if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) {
DRM_DEBUG("execbuf with unknown ring: %d\n",
  (int)(args->flags & I915_EXEC_RING_MASK));
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8a3e4ef00..adfc45d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -734,7 +734,12 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT   (1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START

2014-05-06 Thread Abdiel Janulgue
Adds support for executing the resource streamer on BDW

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |3 ++-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 0eff337..2d5d0ab 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -286,6 +286,7 @@
 #define   MI_BATCH_NON_SECURE_HSW  (1<<13)
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1)
 
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 40a7aa4..cc0110c 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1810,7 +1810,8 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
return ret;
 
/* FIXME(BDW): Address space and security selectors. */
-   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
+   intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) |
+   (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER 
: 0));
intel_ring_emit(ring, lower_32_bits(offset));
intel_ring_emit(ring, upper_32_bits(offset));
intel_ring_emit(ring, MI_NOOP);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 72c3c15..cf9e259 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -116,6 +116,7 @@ struct  intel_ring_buffer {
   unsigned flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_ring_buffer *ring);
 
struct {
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables

2014-05-02 Thread Abdiel Janulgue
Hi all!

On Thursday, April 24, 2014 01:17:14 PM Ville Syrjälä wrote:
> On Thu, Apr 24, 2014 at 11:25:15AM +0300, Abdiel Janulgue wrote:
> > On Thursday, April 24, 2014 07:06:34 AM Chris Wilson wrote:
> > > On Thu, Apr 24, 2014 at 09:08:14AM +0300, Abdiel Janulgue wrote:
> > > > Anyway I haven't tried the work-around where we explictly only disable
> > > > the
> > > > BT and RS on the other user-space clients (xorg driver in this case)
> > > > when
> > > > Mesa is using RS instead of forcing the reset of the clients to use RS
> > > > format.  I'll try that first and let you know if it works.
> > 
> > I hate to break the bad news. Tried this just now - still get hangs :(
> > 
> > So I guess, all userspace clients* does need to use RS-format if we use
> > this feature. GPGPU workloads seems to be special use-case where the RS
> > hwbinding table format can be disabled. Otherwise, I guess we are stuck
> > with this inflexibility.
> 
> The spec also says this:
> "When switching between HW and SW binding table generation,
> SW must issue a state cache invalidate."
> 
> So it does look like they were expecting people to switch between the
> two modes.
> 
> Do you have any igt test for RS? Maybe add an option into rendercopy to
> make use of RS? Then we could write some tests that try to hit this
> problem w/o requiring Mesa.

I've modified igt rendercopy to test switching RS format on and off.

Good news and bad news!

Good news is that switching between RS binding tables and SW format
works seamlessly in BDW! Bad news for HSW because the problems I describe 
above only happens on that chip.

This is a HW issue then.

=Abdiel


> 
> > [1]
> > On the other hand, it doesn't seem all that bad though. The RS hw-binding
> > table format are only needed for clients that submit vertex and pixel
> > shader commands. I've identified currently just UXA and SNA that seem to
> > use this besides Mesa. OpenCL is not affected.
> > 
> > > > If it does, it
> > > > might be more efficient to do that in the kernel?
> > > 
> > > It has to be done in the kernel in order for interoperability with third
> > > party clients.
> > > -Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables

2014-04-24 Thread Abdiel Janulgue
On Thursday, April 24, 2014 01:17:14 PM Ville Syrjälä wrote:
> On Thu, Apr 24, 2014 at 11:25:15AM +0300, Abdiel Janulgue wrote:
> > On Thursday, April 24, 2014 07:06:34 AM Chris Wilson wrote:
> > > On Thu, Apr 24, 2014 at 09:08:14AM +0300, Abdiel Janulgue wrote:
> > > > Anyway I haven't tried the work-around where we explictly only disable
> > > > the
> > > > BT and RS on the other user-space clients (xorg driver in this case)
> > > > when
> > > > Mesa is using RS instead of forcing the reset of the clients to use RS
> > > > format.  I'll try that first and let you know if it works.
> > 
> > I hate to break the bad news. Tried this just now - still get hangs :(
> > 
> > So I guess, all userspace clients* does need to use RS-format if we use
> > this feature. GPGPU workloads seems to be special use-case where the RS
> > hwbinding table format can be disabled. Otherwise, I guess we are stuck
> > with this inflexibility.
> 
> The spec also says this:
> "When switching between HW and SW binding table generation,
> SW must issue a state cache invalidate."
> 
> So it does look like they were expecting people to switch between the
> two modes.

Yep, the previous work-around did include a state cache invalidate.

> 
> Do you have any igt test for RS? Maybe add an option into rendercopy to
> make use of RS? Then we could write some tests that try to hit this
> problem w/o requiring Mesa.

Seems to be a good idea. I'll try to reproduce the problem with igt


> 
> > [1]
> > On the other hand, it doesn't seem all that bad though. The RS hw-binding
> > table format are only needed for clients that submit vertex and pixel
> > shader commands. I've identified currently just UXA and SNA that seem to
> > use this besides Mesa. OpenCL is not affected.
> > 
> > > > If it does, it
> > > > might be more efficient to do that in the kernel?
> > > 
> > > It has to be done in the kernel in order for interoperability with third
> > > party clients.
> > > -Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables

2014-04-24 Thread Abdiel Janulgue
On Thursday, April 24, 2014 07:06:34 AM Chris Wilson wrote:
> On Thu, Apr 24, 2014 at 09:08:14AM +0300, Abdiel Janulgue wrote:
> > Anyway I haven't tried the work-around where we explictly only disable the
> > BT and RS on the other user-space clients (xorg driver in this case) when
> > Mesa is using RS instead of forcing the reset of the clients to use RS
> > format.  I'll try that first and let you know if it works. 

I hate to break the bad news. Tried this just now - still get hangs :(

So I guess, all userspace clients* does need to use RS-format if we use this 
feature. GPGPU workloads seems to be special use-case where the RS hwbinding 
table format can be disabled. Otherwise, I guess we are stuck with this 
inflexibility.

[1]
On the other hand, it doesn't seem all that bad though. The RS hw-binding 
table format are only needed for clients that submit vertex and pixel shader 
commands. I've identified currently just UXA and SNA that seem to use this 
besides Mesa. OpenCL is not affected.


> > If it does, it
> > might be more efficient to do that in the kernel?
> 
> It has to be done in the kernel in order for interoperability with third
> party clients.
> -Chris
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables

2014-04-23 Thread Abdiel Janulgue
On Wednesday, April 23, 2014 08:50:21 PM Ville Syrjälä wrote:
> On Wed, Apr 23, 2014 at 06:52:09PM +0200, Daniel Vetter wrote:
> > On Wed, Apr 23, 2014 at 1:21 PM, Abdiel Janulgue
> > 
> >  wrote:
> > > I've already tried disabling RS at the end of every batch so that next
> > > batch in different context can continue to use older non-RS format.
> > > That does not work either and still causes hangs.
> > > 
> > > What I've seen so far, it seems GPU does not like switching the format
> > > of
> > > commands from RS-format to non-RS format. It's either one way or the
> > > other. If switched on, it affects subsequent contexes henceforth
> > > expecting RS-format commands until the GPU gets reset. That's probably
> > > the note in bspec:
> > > 
> > > "the binding table generator feature has a simple all or nothing model".
> > 
> > Oh hooray, that's just awesome :( So we indeed need to stop the gpu
> > and reset it if there's a non-RS render batch after any RS render
> > batch.
> 
> I'm not sure I buy it. There's an example in the spec showing how to
> disable the RS around eg. GPGPU workloads and re-enable it for 3D
> workloads even within one batch. I guess it's possible that the GPGPU
> pipeline mode is somehow special here, but since the RS state is
> supposed to be saved to the context I'm thinking you should be able to
> disable it whenever you want.
> 
> What's really confusing with that example is the fact that it first
> re-enables the binding tables and then the RS, but then there's a note
> after that saying you have to those steps in the opposite order.
> 
> Anyhoo, I'm wondering if the problems are simply due to disabling RS but
> leaving the binding tables enabled?

Yes, the work-around I tried previously is that at the end of each batch both 
hw-binding tables and RS are disabled in that particular order.

> 
> So one idea might be that when we switch from executing an RS batch to a
> non-RS batch, we should disable the RS and binding tables manually. We
> would then have to demand that any user batch which needs the binding
> tables enabled must enable them, even if if we just executed a batch that
> already used the binding tables. And when we need to disable the RS and
> binding tables we could either have the kernel do that autmagically when
> it notices a RS->non-RS transition, or we could also demand that all user
> batches must disable the binding tables at the end. But maybe demanding
> that from every batch would incur some extra overhead? In any case it
> should be fairly easy to try that and see if it cures the hangs. Or are
> you already doing things that way?

This is actually what current RS implementation in Mesa does. It explicitly 
enables RS then hw-binding tables for each batch start and then the tear-down 
work-around at the batch end I mentioned  above; which didn't help at all.

Anyway I haven't tried the work-around where we explictly only disable the BT 
and RS on the other user-space clients (xorg driver in this case) when Mesa is 
using RS instead of forcing the reset of the clients to use RS format.  I'll 
try that first and let you know if it works. If it does, it might be more 
efficient to do that in the kernel?

--Abdiel

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables

2014-04-23 Thread Abdiel Janulgue
On Tuesday, April 22, 2014 07:20:58 PM Daniel Vetter wrote:
> On Tue, Apr 22, 2014 at 04:23:12PM +0100, Chris Wilson wrote:
> > On Tue, Apr 22, 2014 at 06:16:34PM +0300, Abdiel Janulgue wrote:
> > > This patch series enables resource streamer for xf86-video-intel UXA.
> > > 
> > > Fixes i965 Mesa driver that makes use of resource streamer optimization
> > > to survive a full piglit run without bricking the machine. Previously,
> > > I get random hungs when doing a full piglit round when enabling RS.
> > > After months of head-scratching, I noticed I didn't quite pay attention
> > > 
> > > to this small detail in bspec:
> > >  "The binding table generator feature has a simple all or nothing
> > >  
> > >   model. If HW generated binding tables are enabled, the driver must
> > >   enable
> > >   the pool and use 3D_HW_BINDING_TABLE_POINTER_* commands."
> > > 
> > > I realized that our xf86-video-intel driver is piping out 3D commands
> > > as well that used the older manual-generated binding table format
> > > that caused a conflict when Mesa is using the hw-generated binding table
> > > format.
> > 
> > This has to be per-context right? Otherwise how do you intend to
> > coordinate multiple clients submitting to the kernel using incompatible
> > command sets? Even in the restricted X/DRI sense, how do you intend to
> > negotiate which to use?
> 
> Hm, I've thought that the MI_BB_START should synchronize with the
> asynchronous RS parsing/processing? Is there no way to disable RS again
> for the next batch in a different context?

I've already tried disabling RS at the end of every batch so that next batch 
in different context can continue to use older non-RS format. That does not 
work either and still causes hangs. 

What I've seen so far, it seems GPU does not like switching the format of 
commands from RS-format to non-RS format. It's either one way or the other. If 
switched on, it affects subsequent contexes henceforth expecting RS-format 
commands until the GPU gets reset. That's probably the note in bspec:
"the binding table generator feature has a simple all or nothing model".

> 
> If there's no way to solve this with contexts or some manual reset trick
> using LRIs then we're pretty decently screwed up. Worst case we need to
> stop the gpu and reset it to keep backwards compat :(







___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables

2014-04-22 Thread Abdiel Janulgue
This patch series enables resource streamer for xf86-video-intel UXA.

Fixes i965 Mesa driver that makes use of resource streamer optimization
to survive a full piglit run without bricking the machine. Previously,
I get random hungs when doing a full piglit round when enabling RS. 
After months of head-scratching, I noticed I didn't quite pay attention
to this small detail in bspec:

 "The binding table generator feature has a simple all or nothing
  model. If HW generated binding tables are enabled, the driver must enable
  the pool and use 3D_HW_BINDING_TABLE_POINTER_* commands."

I realized that our xf86-video-intel driver is piping out 3D commands
as well that used the older manual-generated binding table format
that caused a conflict when Mesa is using the hw-generated binding table 
format.

I didn't touch the SNA and video acceleration paths yet. Please let me know
if I'm on the right track.

To use the feature, set in xorg.conf:

Option "ResourceStreamer" "true"

 src/intel_options.c |1 +
 src/intel_options.h |1 +
 src/uxa/i965_3d.c   |5 ++-
 src/uxa/i965_reg.h  |8 +
 src/uxa/i965_render.c   |   78 +++
 src/uxa/intel.h |3 ++
 src/uxa/intel_batchbuffer.c |7 ++--
 src/uxa/intel_driver.c  |   10 +-
 8 files changed, 94 insertions(+), 19 deletions(-)

Abdiel Janulgue (2):

[PATCH 1/2] xf86-video-intel: Add "ResourceStreamer" option
[PATCH 2/2] xf86-video-intel: Enable hw-generated binding tables for
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC PATCH 1/2] xf86-video-intel: Add "ResourceStreamer" option

2014-04-22 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 src/intel_options.c|1 +
 src/intel_options.h|1 +
 src/uxa/intel.h|3 +++
 src/uxa/intel_driver.c |   10 +-
 4 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/intel_options.c b/src/intel_options.c
index 02a4ae1..cc418d1 100644
--- a/src/intel_options.c
+++ b/src/intel_options.c
@@ -35,6 +35,7 @@ const OptionInfoRec intel_options[] = {
{OPTION_DEBUG_FLUSH_CACHES, "DebugFlushCaches", OPTV_BOOLEAN, {0}, 0},
{OPTION_DEBUG_WAIT, "DebugWait", OPTV_BOOLEAN, {0}, 0},
{OPTION_BUFFER_CACHE,   "BufferCache",  OPTV_BOOLEAN,   {0},1},
+   {OPTION_RESOURCESTREAMER, "ResourceStreamer",   OPTV_BOOLEAN,   {0},
0},
 #endif
{-1,NULL,   OPTV_NONE,  {0},0}
 };
diff --git a/src/intel_options.h b/src/intel_options.h
index 77f0c45..4867419 100644
--- a/src/intel_options.h
+++ b/src/intel_options.h
@@ -43,6 +43,7 @@ enum intel_options {
OPTION_DEBUG_FLUSH_CACHES,
OPTION_DEBUG_WAIT,
OPTION_BUFFER_CACHE,
+   OPTION_RESOURCESTREAMER,
 #endif
NUM_OPTIONS,
 };
diff --git a/src/uxa/intel.h b/src/uxa/intel.h
index 6ac770e..611eeea 100644
--- a/src/uxa/intel.h
+++ b/src/uxa/intel.h
@@ -349,6 +349,9 @@ typedef struct intel_screen_private {
 */
Bool fallback_debug;
unsigned debug_flush;
+
+   dri_bo *hw_bt_pool_bo;
+   Bool use_resource_streamer;
 #if HAVE_UDEV
struct udev_monitor *uevent_monitor;
pointer uevent_handler;
diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c
index 0a4fe2a..5574ffe 100644
--- a/src/uxa/intel_driver.c
+++ b/src/uxa/intel_driver.c
@@ -196,6 +196,15 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn)
intel->fallback_debug = xf86ReturnOptValBool(intel->Options,
 OPTION_FALLBACKDEBUG,
 FALSE);
+   intel->use_resource_streamer = xf86ReturnOptValBool(intel->Options,
+OPTION_RESOURCESTREAMER,
+FALSE);
+   if (intel->use_resource_streamer)
+   xf86DrvMsg(scrn->scrnIndex, X_CONFIG,
+  "Enabling Resource Streamer\n");
+   else
+   xf86DrvMsg(scrn->scrnIndex, X_CONFIG,
+  "Disabling Resource Streamer\n");
 
intel->debug_flush = 0;
 
@@ -870,7 +879,6 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL)
 * appropriate.
 */
intel->XvEnabled = TRUE;
-
 #ifdef DRI2
if (intel->directRenderingType == DRI_NONE
&& I830DRI2ScreenInit(screen))
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC PATCH 2/2] xf86-video-intel: Enable hw-generated binding tables for UXA

2014-04-22 Thread Abdiel Janulgue
Code is based on my hw-generated binding table code for Mesa
adapted to i965_composite path in UXA.

Signed-off-by: Abdiel Janulgue 
---
 src/uxa/i965_3d.c   |5 ++-
 src/uxa/i965_reg.h  |8 +
 src/uxa/i965_render.c   |   78 +++
 src/uxa/intel_batchbuffer.c |7 ++--
 4 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/src/uxa/i965_3d.c b/src/uxa/i965_3d.c
index 757a979..afbb5a7 100644
--- a/src/uxa/i965_3d.c
+++ b/src/uxa/i965_3d.c
@@ -406,7 +406,10 @@ gen7_upload_binding_table(intel_screen_private *intel,
  uint32_t ps_binding_table_offset)
 {
OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
-   OUT_BATCH(ps_binding_table_offset);
+   if (intel->use_resource_streamer)
+   OUT_BATCH(ps_binding_table_offset >> 1);
+   else
+   OUT_BATCH(ps_binding_table_offset);
 }
 
 void
diff --git a/src/uxa/i965_reg.h b/src/uxa/i965_reg.h
index a934a67..157b212 100644
--- a/src/uxa/i965_reg.h
+++ b/src/uxa/i965_reg.h
@@ -296,6 +296,14 @@
 /* DW1 */
 # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16
 
+/* GEN7+ resource streamer */
+#define GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC   BRW_3D(3, 1, 0x19)
+# define BINDING_TABLE_POOL_ENABLE  0x0860
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_VS  BRW_3D(3, 0, 0x43)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_GS  BRW_3D(3, 0, 0x44)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_HS  BRW_3D(3, 0, 0x45)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_DS  BRW_3D(3, 0, 0x46)
+#define GEN7_3DSTATE_BINDING_TABLE_EDIT_PS  BRW_3D(3, 0, 0x47)
 
 #define PIPELINE_SELECT_3D 0
 #define PIPELINE_SELECT_MEDIA  1
diff --git a/src/uxa/i965_render.c b/src/uxa/i965_render.c
index 74f57af..d5225dd 100644
--- a/src/uxa/i965_render.c
+++ b/src/uxa/i965_render.c
@@ -1783,6 +1783,10 @@ static void i965_surface_flush(struct 
intel_screen_private *intel)
   sizeof(intel->surface_data), 4096);
assert(intel->surface_bo);
 
+   drm_intel_bo_unreference(intel->hw_bt_pool_bo);
+   intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt",
+ 131072, 4096);
+   assert(intel->hw_bt_pool_bo);
return;
(void)ret;
 }
@@ -2217,32 +2221,70 @@ static void i965_select_vertex_buffer(struct 
intel_screen_private *intel)
 static void i965_bind_surfaces(struct intel_screen_private *intel)
 {
uint32_t *binding_table;
+   uint32_t surf0 = 0, surf1 = 0, surf2 = 0;
 
assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= 
sizeof(intel->surface_data));
 
-   binding_table = (uint32_t*) (intel->surface_data + intel->surface_used);
-   intel->surface_table = intel->surface_used;
-   intel->surface_used += SURFACE_STATE_PADDED_SIZE;
-
-   binding_table[0] =
-   i965_set_picture_surface_state(intel,
+   surf0 = i965_set_picture_surface_state(intel,
   intel->render_dest_picture,
   intel->render_dest,
   TRUE);
-   binding_table[1] =
-   i965_set_picture_surface_state(intel,
+   surf1 = i965_set_picture_surface_state(intel,
   intel->render_source_picture,
   intel->render_source,
   FALSE);
if (intel->render_mask) {
-   binding_table[2] =
-   i965_set_picture_surface_state(intel,
-  
intel->render_mask_picture,
-  intel->render_mask,
-  FALSE);
+   surf2  = i965_set_picture_surface_state(intel,
+   
intel->render_mask_picture,
+   intel->render_mask,
+   FALSE);
+   }
+
+   if (intel->use_resource_streamer) {
+   intel->surface_table += (256 * sizeof(uint16_t));
+   OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (5 - 2));
+   OUT_BATCH(0x3);
+   {
+   OUT_BATCH(0 << 16 | surf0 >> 5);
+   OUT_BATCH(1 << 16 | surf1 >> 5);
+   OUT_BATCH(2 << 16 | surf2 >> 5);
+   }
+   } else {
+   binding_table = (uint32_t*) (intel->surface_data + 
intel->surface_used);
+   intel->surfac

[Intel-gfx] [PATCH 2/2] drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START (v3)

2013-10-10 Thread Abdiel Janulgue
v3: Use the I915_DISPATCH_RS flag to determine if batchbuffer needs
resource streamer bit.

Cc: Chris Wilson 
Cc: Daniel Vetter 
Cc: Ben Widawsky 
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h | 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c246727..f3c9103 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -246,6 +246,7 @@
 #define   MI_BATCH_NON_SECURE_HSW  (1<<13)
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 #define MI_SEMAPHORE_MBOX  MI_INSTR(0x16, 1) /* gen6+ */
 #define  MI_SEMAPHORE_GLOBAL_GTT(1<<22)
 #define  MI_SEMAPHORE_UPDATE   (1<<21)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b67104a..f9564b1 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1654,7 +1654,8 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
 
intel_ring_emit(ring,
MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
-   (flags & I915_DISPATCH_SECURE ? 0 : 
MI_BATCH_NON_SECURE_HSW));
+   (flags & I915_DISPATCH_SECURE ? 0 : 
MI_BATCH_NON_SECURE_HSW) |
+   (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER 
: 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.8.2.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/hsw: Add I915_EXEC_RESOURCE_STREAMER flag (v3)

2013-10-10 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer.

v3: - Make sure batch is only submitted on render ring and Haswell (Daniel)
- Separate EXEC and DISPATCH flags (Chris)
- Update __I915_EXEC_UNKNOWN_FLAGS (Kenneth)

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +++
 drivers/gpu/drm/i915/intel_ringbuffer.h| 1 +
 include/uapi/drm/i915_drm.h| 7 ++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..833677d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -957,6 +957,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
if (args->flags & I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
+   if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
+   if ((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_RENDER ||
+   !IS_HASWELL(dev))
+   return -EINVAL;
+
+   flags |= I915_DISPATCH_RS;
+   }
 
switch (args->flags & I915_EXEC_RING_MASK) {
case I915_EXEC_DEFAULT:
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 71a73f4..bd74427 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -110,6 +110,7 @@ struct  intel_ring_buffer {
   unsigned flags);
 #define I915_DISPATCH_SECURE 0x1
 #define I915_DISPATCH_PINNED 0x2
+#define I915_DISPATCH_RS 0x4
void(*cleanup)(struct intel_ring_buffer *ring);
int (*sync_to)(struct intel_ring_buffer *ring,
   struct intel_ring_buffer *to,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..edd6e31 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,7 +731,12 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT   (1<<12)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<13)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
1.8.2.1

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START

2013-10-08 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |7 ---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index c246727..f3c9103 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -246,6 +246,7 @@
 #define   MI_BATCH_NON_SECURE_HSW  (1<<13)
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 #define MI_SEMAPHORE_MBOX  MI_INSTR(0x16, 1) /* gen6+ */
 #define  MI_SEMAPHORE_GLOBAL_GTT(1<<22)
 #define  MI_SEMAPHORE_UPDATE   (1<<21)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index b67104a..c5dd71b 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1647,14 +1647,15 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
  unsigned flags)
 {
int ret;
+   int ring_emit_flags = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
+   (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW);
 
ret = intel_ring_begin(ring, 2);
if (ret)
return ret;
 
-   intel_ring_emit(ring,
-   MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
-   (flags & I915_DISPATCH_SECURE ? 0 : 
MI_BATCH_NON_SECURE_HSW));
+   intel_ring_emit(ring, ring_emit_flags | (flags & 
I915_EXEC_RESOURCE_STREAMER ?
+MI_BATCH_RESOURCE_STREAMER : 0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/hsw: Add I915_EXEC_RESOURCE_STREAMER flag

2013-10-08 Thread Abdiel Janulgue
Ensures that the batch buffer is executed by the resource streamer.

Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 ++
 include/uapi/drm/i915_drm.h|5 +
 2 files changed, 7 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 0ce0d47..4a56c58 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -962,6 +962,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
case I915_EXEC_DEFAULT:
case I915_EXEC_RENDER:
ring = &dev_priv->ring[RCS];
+   flags |= (args->flags & I915_EXEC_RESOURCE_STREAMER) ?
+ I915_EXEC_RESOURCE_STREAMER : 0;
break;
case I915_EXEC_BSD:
ring = &dev_priv->ring[VCS];
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 3a4e97b..5a4bd16 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -731,6 +731,11 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_HANDLE_LUT   (1<<12)
 
+/** Tell the kernel that the batchbuffer is processed by
+ *  the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<13)
+
 #define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1)
 
 #define I915_EXEC_CONTEXT_ID_MASK  (0x)
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] drm/i915/hsw: Enable resource streamer (v2)

2013-10-08 Thread Abdiel Janulgue
v2 of drm-i915 part of resource streamer enabling. Re-submitted
finally now that the Mesa portions are starting to take shape.
I also addressed some of the comments from Daniel and Chris from
the previous implementation.

Cc: Daniel Vetter 
Cc: Chris Wilson 

Abdiel Janulgue (2):
  drm/i915/hsw: Add I915_EXEC_RESOURCE_STREAMER flag
  drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START
--

 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++
 drivers/gpu/drm/i915/i915_reg.h| 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c| 7 ---
 include/uapi/drm/i915_drm.h| 5 +
 4 files changed, 12 insertions(+), 3 deletions(-)


___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 2/2] drm/i915: Prevent loading of uninitialized context garbage

2013-08-22 Thread Abdiel Janulgue
On Wednesday, August 21, 2013 11:12:36 PM Daniel Vetter wrote:
> On Wed, Aug 21, 2013 at 06:31:07PM +0300, Ville Syrjälä wrote:
> > On Wed, Aug 21, 2013 at 04:43:33PM +0300, Ville Syrjälä wrote:
> > > On Thu, Aug 08, 2013 at 08:00:26PM +0100, Chris Wilson wrote:
> > > > The extended state bits are stored in the LCA register and affect all
> > > > updates to the LCA register - i.e. the state on the old context is
> > > > saved
> > > > when SAVE_EX_STATE_EN  is currently set in the old context address
> > > > before
> > > > the update, and the new context is restored when RESTORE_EX_STATE_EN
> > > > is
> > > > set in the new context address. This is irrespective of the
> > > > RESTORE_INHIBIT flag in the MI_SET_CONTEXT.
> > > > 
> > > > Hence, upon initial loading the contents of the extended state is read
> > > > from uninitialised data. To workaround this, on first load we do a
> > > > dummy
> > > > load without the mandatory RESTORE_EX_STATE_EN bit so that the real
> > > > load
> > > > causes us to initialise the extended state of the context before it is
> > > > then loaded by the LCA update.
> > > > 
> > > > v2: Split out the introduction of the variable length MI_SET_CONTEXT
> > > > command sequence.
> > > > 
> > > > References: https://bugs.freedesktop.org/show_bug.cgi?id=64073
> > > > Signed-off-by: Chris Wilson 
> > > > Cc: Ben Widawsky 
> > > > ---
> > > > 
> > > >  drivers/gpu/drm/i915/i915_gem_context.c | 18 ++
> > > >  1 file changed, 18 insertions(+)
> > > > 
> > > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c
> > > > b/drivers/gpu/drm/i915/i915_gem_context.c index 8a7b61e..a57d49a
> > > > 100644
> > > > --- a/drivers/gpu/drm/i915/i915_gem_context.c
> > > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c
> > > > @@ -367,6 +367,8 @@ mi_set_context(struct intel_ring_buffer *ring,
> > > > 
> > > > case 5: len += 2;
> > > > 
> > > > break;
> > > > 
> > > > }
> > > > 
> > > > +   if (!new_context->is_initialized)
> > > > +   len += 2;
> > > > 
> > > > ret = intel_ring_begin(ring, len);
> > > > if (ret)
> > > > 
> > > > @@ -382,6 +384,22 @@ mi_set_context(struct intel_ring_buffer *ring,
> > > > 
> > > > break;
> > > > 
> > > > }
> > > > 
> > > > +   if (!new_context->is_initialized) {
> > > > +   /* The GPU tries to restore the extended state 
> > > > irrespective
> > > > +* of RestoreInhibit (since it is part of the LCA switch
> > > > +* itself rather than the MI_SET_CONTEXT command).
> > > > +* Since the initial contents may be garbage we do a 
> > > > dummy
> > > > +* load first then set the mandatory flag for any future
> > > > +* ring context switches.
> > > > +*/
> > > > +   intel_ring_emit(ring, MI_SET_CONTEXT);
> > > > +   intel_ring_emit(ring,
> > > > +   
> > > > i915_gem_obj_ggtt_offset(new_context->obj) |
> > > > +   MI_MM_SPACE_GTT |
> > > > +   MI_SAVE_EXT_STATE_EN |
> > > > +   hw_flags);
> > > > +   }
> > > 
> > > Hmm. Couldn't we just do this w/ one MI_SET_CONTEXT? Just drop the
> > > MI_RESTORE_EXT_STATE_EN flag if the context is not initialized. The
> > > MI_SAVE_EXT_STATE_EN will be saved in the CCID, so when we switch to
> > > another context the extended state will be saved. And for the next
> > > switch to this context we will set the MI_RESTORE_EXT_STATE_EN bit
> > > in MI_SET_CONTEXT so it should get restored.
> > > 
> > > But I must admit BSpec is a bit confusing on the topic. It says the
> > > restore bit affects the switch to the context specified in the
> > > logical context address. I take that to mean that the effect of the
> > > restore bit is immediate. But BSpec also says that the bit is stored in
> > > CCID to control the subsequent switch to the same context. So does that
> > > actually mean that 'effective.restore_ext = CCID.restore_ext |
> > > MI_SET_CONTEXT.restore_ext'?
> > > 
> > > Oh, but BSpec also says that both bits must be set when RS2 power state
> > > is enabled. I think that's the same as RC6, or is it? So I guess the
> > > hardware might consult these bits when entering/leaving RC6. So I
> > > suppose
> > > we really need to make sure both bits are always set in case we hit RC6.
> > > So based on that reasoning the patch would seem correct.
> > > 
> > > I guess I'll give it an r-b regardless :)
> > > 
> > > Reviewed-by: Ville Syrjälä 
> > 
> > I just noticed that on HSW these bits control the resource streamer
> > state save/restore. The spec says we should always set the RS
> > restore bit if we set the RS save bit. So maybe we need some
> > !IS_HASWELL checks in there...
> 
> Looks like we're lucky since we don't have RS support yet ;-) Can you
> please poke Abdiel abou

[Intel-gfx] [RFC libdrm PATCH] intel: Add support for resource streamer

2013-07-08 Thread Abdiel Janulgue
Expose defines for resource streamer controls.
Based on the work of: Lukasz Anaczkowski 

Signed-off-by: Abdiel Janulgue 
---
 include/drm/i915_drm.h |1 +
 1 file changed, 1 insertion(+)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index aa983f3..8ddda40 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -651,6 +651,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD(2<<0)
 #define I915_EXEC_BLT(3<<0)
 #define I915_EXEC_VEBOX  (4<<0)
+#define I915_EXEC_RS (8<<0)
 
 /* Used for switching the constants addressing mode on gen4+ RENDER ring.
  * Gen6+ only supports relative addressing to dynamic state (default) and
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/2] drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START

2013-07-08 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_reg.h |1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c |6 --
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index e9c50fa..3db1184 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -254,6 +254,7 @@
 #define   MI_BATCH_NON_SECURE_HSW  (1<<13)
 #define MI_BATCH_BUFFER_START  MI_INSTR(0x31, 0)
 #define   MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */
+#define   MI_BATCH_RESOURCE_STREAMER (1<<10)
 #define MI_SEMAPHORE_MBOX  MI_INSTR(0x16, 1) /* gen6+ */
 #define  MI_SEMAPHORE_GLOBAL_GTT(1<<22)
 #define  MI_SEMAPHORE_UPDATE   (1<<21)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index bc4c11b..9940fb3 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1678,14 +1678,16 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer 
*ring,
  unsigned flags)
 {
int ret;
+   int ring_emit_flags = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
+   (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW);
 
ret = intel_ring_begin(ring, 2);
if (ret)
return ret;
 
intel_ring_emit(ring,
-   MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW |
-   (flags & I915_DISPATCH_SECURE ? 0 : 
MI_BATCH_NON_SECURE_HSW));
+   ring_emit_flags |
+   (flags & I915_EXEC_RS ? MI_BATCH_RESOURCE_STREAMER : 
0));
/* bit0-7 is the length on GEN6+ */
intel_ring_emit(ring, offset);
intel_ring_advance(ring);
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/2] drm/i915/hsw: Expose resource streamer control flags

2013-07-08 Thread Abdiel Janulgue
Signed-off-by: Abdiel Janulgue 
---
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 ++
 include/uapi/drm/i915_drm.h|1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 5aeb447..3d6d3c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -865,6 +865,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
if (args->flags & I915_EXEC_IS_PINNED)
flags |= I915_DISPATCH_PINNED;
+   if (args->flags & I915_EXEC_RS)
+   flags |= I915_EXEC_RS;
 
switch (args->flags & I915_EXEC_RING_MASK) {
case I915_EXEC_DEFAULT:
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 923ed7f..728f544 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -661,6 +661,7 @@ struct drm_i915_gem_execbuffer2 {
 #define I915_EXEC_BSD(2<<0)
 #define I915_EXEC_BLT(3<<0)
 #define I915_EXEC_VEBOX  (4<<0)
+#define I915_EXEC_RS (8<<0)
 
 /* Used for switching the constants addressing mode on gen4+ RENDER ring.
  * Gen6+ only supports relative addressing to dynamic state (default) and
-- 
1.7.9.5

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] RFC: drm/1915: Resource streamer initial support

2013-07-08 Thread Abdiel Janulgue
Daniel Vetter suggested at some point we need to implement getparam ioctl 
so userspace can figure out whether kernel supports RS at runtime.
For now, this will do to support the corresponding RFC mesa patches.

Based on the work of: Lukasz Anaczkowski 

Abdiel Janulgue (2):
  drm/i915/hsw: Expose resource streamer control flags
  drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START

[abj@hasbox drm-intel]$ git diff bc32705a7686370b2e50d467b001d88c2c059e39 --stat
 drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++
 drivers/gpu/drm/i915/i915_reg.h| 1 +
 drivers/gpu/drm/i915/intel_ringbuffer.c| 6 --
 include/uapi/drm/i915_drm.h| 1 +
 4 files changed, 8 insertions(+), 2 deletions(-)

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


<    1   2   3