[Intel-gfx] [PATCH v3 2/3] igt/gem_wait: Use new igt_spin_batch
Cc: Chris Wilson Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- tests/gem_wait.c | 125 --- 1 file changed, 7 insertions(+), 118 deletions(-) diff --git a/tests/gem_wait.c b/tests/gem_wait.c index b4127de..785bb14 100644 --- a/tests/gem_wait.c +++ b/tests/gem_wait.c @@ -27,18 +27,6 @@ #include "igt.h" -#include -#include -#include - -#define gettid() syscall(__NR_gettid) -#define sigev_notify_thread_id _sigev_un._tid - -#define LOCAL_I915_EXEC_BSD_SHIFT (13) -#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) - -#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) - static int __gem_wait(int fd, struct drm_i915_gem_wait *w) { int err; @@ -75,14 +63,6 @@ static void invalid_buf(int fd) igt_assert_eq(__gem_wait(fd, &wait), -ENOENT); } -static uint32_t *batch; - -static void sigiter(int sig, siginfo_t *info, void *arg) -{ - *batch = MI_BATCH_BUFFER_END; - __sync_synchronize(); -} - #define MSEC_PER_SEC (1000) #define USEC_PER_SEC (1000 * MSEC_PER_SEC) #define NSEC_PER_SEC (1000 * USEC_PER_SEC) @@ -91,113 +71,26 @@ static void sigiter(int sig, siginfo_t *info, void *arg) #define HANG 2 static void basic(int fd, unsigned engine, unsigned flags) { - const int gen = intel_gen(intel_get_drm_devid(fd)); - struct drm_i915_gem_exec_object2 obj; - struct drm_i915_gem_relocation_entry reloc; - struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_wait wait; - unsigned engines[16]; - unsigned nengine; - int i, timeout; - - nengine = 0; - if (engine == -1) { - for_each_engine(fd, engine) - if (engine) engines[nengine++] = engine; - } else { - igt_require(gem_has_ring(fd, engine)); - engines[nengine++] = engine; - } - igt_require(nengine); - - memset(&execbuf, 0, sizeof(execbuf)); - execbuf.buffers_ptr = (uintptr_t)&obj; - execbuf.buffer_count = 1; - - memset(&obj, 0, sizeof(obj)); - obj.handle = gem_create(fd, 4096); - - obj.relocs_ptr = (uintptr_t)&reloc; - obj.relocation_count = 1; - memset(&reloc, 0, sizeof(reloc)); - - batch = gem_mmap__gtt(fd, obj.handle, 4096, PROT_WRITE); - gem_set_domain(fd, obj.handle, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); - - reloc.target_handle = obj.handle; /* recurse */ - reloc.presumed_offset = 0; - reloc.offset = sizeof(uint32_t); - reloc.delta = 0; - reloc.read_domains = I915_GEM_DOMAIN_COMMAND; - reloc.write_domain = 0; - - i = 0; - batch[i] = MI_BATCH_BUFFER_START; - if (gen >= 8) { - batch[i] |= 1 << 8 | 1; - batch[++i] = 0; - batch[++i] = 0; - } else if (gen >= 6) { - batch[i] |= 1 << 8; - batch[++i] = 0; - } else { - batch[i] |= 2 << 6; - batch[++i] = 0; - if (gen < 4) { - batch[i] |= 1; - reloc.delta = 1; - } - } - - for (i = 0; i < nengine; i++) { - execbuf.flags &= ~ENGINE_MASK; - execbuf.flags |= engines[i]; - gem_execbuf(fd, &execbuf); - } + int wait_s = (flags == 0) ? NSEC_PER_SEC : 0; + wait_s = ((flags & HANG) == 0) ? wait_s : -1; + igt_spin_t spin = igt_spin_batch(fd, wait_s, engine, 0); + int timeout; memset(&wait, 0, sizeof(wait)); - wait.bo_handle = obj.handle; - igt_assert_eq(__gem_wait(fd, &wait), -ETIME); + wait.bo_handle = spin.handle; if (flags & BUSY) { struct timespec tv; timeout = 120; - if ((flags & HANG) == 0) { - *batch = MI_BATCH_BUFFER_END; - __sync_synchronize(); + if ((flags & HANG) == 0) timeout = 1; - } memset(&tv, 0, sizeof(tv)); while (__gem_wait(fd, &wait) == -ETIME) igt_assert(igt_seconds_elapsed(&tv) < timeout); } else { - timer_t timer; - - if ((flags & HANG) == 0) { - struct sigevent sev; - struct sigaction act; - struct itimerspec its; - - memset(&sev, 0, sizeof(sev)); - sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID; - sev.sigev_notify_thread_id = gettid(); - sev.sigev_signo = SIGRTMIN + 1; - igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0); - -
[Intel-gfx] [PATCH v3 1/3] lib: add igt_dummyload
A lot of igt testcases need some GPU workload to make sure a race window is big enough. Unfortunately having a fixed amount of workload leads to spurious test failures or overtly long runtimes on some fast/slow platforms. This library contains functionality to submit GPU workloads that should consume exactly a specific amount of time. v2 : Add recursive batch feature from Chris v3 : Drop auto-tuned stuff. Add bo dependecy to recursive batch by adding a dummy reloc to the bo as suggested by Ville. Cc: Daniel Vetter Cc: Ville Syrjälä Cc: Chris Wilson Signed-off-by: Abdiel Janulgue --- lib/Makefile.sources | 2 + lib/igt.h| 1 + lib/igt_dummyload.c | 274 +++ lib/igt_dummyload.h | 42 4 files changed, 319 insertions(+) create mode 100644 lib/igt_dummyload.c create mode 100644 lib/igt_dummyload.h diff --git a/lib/Makefile.sources b/lib/Makefile.sources index e8e277b..7fc5ec2 100644 --- a/lib/Makefile.sources +++ b/lib/Makefile.sources @@ -75,6 +75,8 @@ lib_source_list = \ igt_draw.h \ igt_pm.c\ igt_pm.h\ + igt_dummyload.c \ + igt_dummyload.h \ uwildmat/uwildmat.h \ uwildmat/uwildmat.c \ $(NULL) diff --git a/lib/igt.h b/lib/igt.h index d751f24..a0028d5 100644 --- a/lib/igt.h +++ b/lib/igt.h @@ -32,6 +32,7 @@ #include "igt_core.h" #include "igt_debugfs.h" #include "igt_draw.h" +#include "igt_dummyload.h" #include "igt_fb.h" #include "igt_gt.h" #include "igt_kms.h" diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c new file mode 100644 index 000..d37a30b --- /dev/null +++ b/lib/igt_dummyload.c @@ -0,0 +1,274 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "igt.h" +#include "igt_dummyload.h" +#include +#include +#include + +/** + * SECTION:igt_dummyload + * @short_description: Library for submitting GPU workloads + * @title: Dummyload + * @include: igt.h + * + * A lot of igt testcases need some GPU workload to make sure a race window is + * big enough. Unfortunately having a fixed amount of workload leads to + * spurious test failures or overtly long runtimes on some fast/slow platforms. + * This library contains functionality to submit GPU workloads that should + * consume exactly a specific amount of time. + */ + +#define NSEC_PER_SEC 10L + +#define gettid() syscall(__NR_gettid) +#define sigev_notify_thread_id _sigev_un._tid + +#define LOCAL_I915_EXEC_BSD_SHIFT (13) +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) + +#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) + +static void +fill_object(struct drm_i915_gem_exec_object2 *obj, uint32_t gem_handle, + struct drm_i915_gem_relocation_entry *relocs, uint32_t count) +{ + memset(obj, 0, sizeof(*obj)); + obj->handle = gem_handle; + obj->relocation_count = count; + obj->relocs_ptr = (uintptr_t)relocs; +} + +static void +fill_reloc(struct drm_i915_gem_relocation_entry *reloc, + uint32_t gem_handle, uint32_t offset, + uint32_t read_domains, uint32_t write_domains) +{ + reloc->target_handle = gem_handle; + reloc->delta = 0; + reloc->offset = offset * sizeof(uint32_t); + reloc->presumed_offset = 0; + reloc->read_domains = read_domains; + reloc->write_domain = write_domains; +} + + +static uint32_t *batch; + +static uint32_t emit_recursive_batch(int fd, int engine, unsigned dep_handle) +{ + const int gen = intel_gen(intel_get_drm_devid(fd)); + struct drm_i915_gem_exec_object2 obj[2]; + struct d
Re: [Intel-gfx] [PATCH 3/3] igt/kms_flip: Use new igt_spin_batch
On 10/28/2016 07:02 PM, Ville Syrjälä wrote: > On Fri, Oct 28, 2016 at 06:47:26PM +0300, Abdiel Janulgue wrote: >> Cc: Chris Wilson >> Cc: Daniel Vetter >> Signed-off-by: Abdiel Janulgue >> --- >> tests/kms_flip.c | 4 ++-- >> 1 file changed, 2 insertions(+), 2 deletions(-) >> >> diff --git a/tests/kms_flip.c b/tests/kms_flip.c >> index 9829b35..13cb262 100644 >> --- a/tests/kms_flip.c >> +++ b/tests/kms_flip.c >> @@ -866,10 +866,10 @@ static unsigned int run_test_step(struct test_output >> *o) >> o->current_fb_id = !o->current_fb_id; >> >> if (o->flags & TEST_WITH_DUMMY_BCS) >> -emit_dummy_load__bcs(o, 1); >> +igt_spin_batch_wait(drm_fd, 1, I915_EXEC_BLT); >> >> if (o->flags & TEST_WITH_DUMMY_RCS) >> -emit_dummy_load__rcs(o, 1); >> +igt_spin_batch_wait(drm_fd, 1, I915_EXEC_RENDER); > > NAK That's not going to add the required dependency between the load > and the bo used by the test. Right. For these cases would it be more straightforward to stick to the auto-tuned rendercopy/blit load operations on the bo instead of inserting a recursive batch on these situations? Any ideas? >> >> if (o->flags & TEST_FB_RECREATE) >> recreate_fb(o); >> -- >> 2.7.0 >> >> ___ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/3] igt/gem_wait: Use new igt_spin_batch
Cc: Chris Wilson Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- tests/gem_wait.c | 125 --- 1 file changed, 7 insertions(+), 118 deletions(-) diff --git a/tests/gem_wait.c b/tests/gem_wait.c index b4127de..630058c 100644 --- a/tests/gem_wait.c +++ b/tests/gem_wait.c @@ -27,18 +27,6 @@ #include "igt.h" -#include -#include -#include - -#define gettid() syscall(__NR_gettid) -#define sigev_notify_thread_id _sigev_un._tid - -#define LOCAL_I915_EXEC_BSD_SHIFT (13) -#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) - -#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) - static int __gem_wait(int fd, struct drm_i915_gem_wait *w) { int err; @@ -75,14 +63,6 @@ static void invalid_buf(int fd) igt_assert_eq(__gem_wait(fd, &wait), -ENOENT); } -static uint32_t *batch; - -static void sigiter(int sig, siginfo_t *info, void *arg) -{ - *batch = MI_BATCH_BUFFER_END; - __sync_synchronize(); -} - #define MSEC_PER_SEC (1000) #define USEC_PER_SEC (1000 * MSEC_PER_SEC) #define NSEC_PER_SEC (1000 * USEC_PER_SEC) @@ -91,113 +71,26 @@ static void sigiter(int sig, siginfo_t *info, void *arg) #define HANG 2 static void basic(int fd, unsigned engine, unsigned flags) { - const int gen = intel_gen(intel_get_drm_devid(fd)); - struct drm_i915_gem_exec_object2 obj; - struct drm_i915_gem_relocation_entry reloc; - struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_wait wait; - unsigned engines[16]; - unsigned nengine; - int i, timeout; - - nengine = 0; - if (engine == -1) { - for_each_engine(fd, engine) - if (engine) engines[nengine++] = engine; - } else { - igt_require(gem_has_ring(fd, engine)); - engines[nengine++] = engine; - } - igt_require(nengine); - - memset(&execbuf, 0, sizeof(execbuf)); - execbuf.buffers_ptr = (uintptr_t)&obj; - execbuf.buffer_count = 1; - - memset(&obj, 0, sizeof(obj)); - obj.handle = gem_create(fd, 4096); - - obj.relocs_ptr = (uintptr_t)&reloc; - obj.relocation_count = 1; - memset(&reloc, 0, sizeof(reloc)); - - batch = gem_mmap__gtt(fd, obj.handle, 4096, PROT_WRITE); - gem_set_domain(fd, obj.handle, - I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); - - reloc.target_handle = obj.handle; /* recurse */ - reloc.presumed_offset = 0; - reloc.offset = sizeof(uint32_t); - reloc.delta = 0; - reloc.read_domains = I915_GEM_DOMAIN_COMMAND; - reloc.write_domain = 0; - - i = 0; - batch[i] = MI_BATCH_BUFFER_START; - if (gen >= 8) { - batch[i] |= 1 << 8 | 1; - batch[++i] = 0; - batch[++i] = 0; - } else if (gen >= 6) { - batch[i] |= 1 << 8; - batch[++i] = 0; - } else { - batch[i] |= 2 << 6; - batch[++i] = 0; - if (gen < 4) { - batch[i] |= 1; - reloc.delta = 1; - } - } - - for (i = 0; i < nengine; i++) { - execbuf.flags &= ~ENGINE_MASK; - execbuf.flags |= engines[i]; - gem_execbuf(fd, &execbuf); - } + int wait_s = (flags == 0) ? 1 : 0; + wait_s = ((flags & HANG) == 0) ? wait_s : -1; + igt_spin_t spin = igt_spin_batch(fd, wait_s, engine); + int timeout; memset(&wait, 0, sizeof(wait)); - wait.bo_handle = obj.handle; - igt_assert_eq(__gem_wait(fd, &wait), -ETIME); + wait.bo_handle = spin.handle; if (flags & BUSY) { struct timespec tv; timeout = 120; - if ((flags & HANG) == 0) { - *batch = MI_BATCH_BUFFER_END; - __sync_synchronize(); + if ((flags & HANG) == 0) timeout = 1; - } memset(&tv, 0, sizeof(tv)); while (__gem_wait(fd, &wait) == -ETIME) igt_assert(igt_seconds_elapsed(&tv) < timeout); } else { - timer_t timer; - - if ((flags & HANG) == 0) { - struct sigevent sev; - struct sigaction act; - struct itimerspec its; - - memset(&sev, 0, sizeof(sev)); - sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID; - sev.sigev_notify_thread_id = gettid(); - sev.sigev_signo = SIGRTMIN + 1; - igt_assert(timer_create(CLOCK_MONOTONIC, &sev, &timer) == 0); - -
[Intel-gfx] [PATCH 3/3] igt/kms_flip: Use new igt_spin_batch
Cc: Chris Wilson Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- tests/kms_flip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/kms_flip.c b/tests/kms_flip.c index 9829b35..13cb262 100644 --- a/tests/kms_flip.c +++ b/tests/kms_flip.c @@ -866,10 +866,10 @@ static unsigned int run_test_step(struct test_output *o) o->current_fb_id = !o->current_fb_id; if (o->flags & TEST_WITH_DUMMY_BCS) - emit_dummy_load__bcs(o, 1); + igt_spin_batch_wait(drm_fd, 1, I915_EXEC_BLT); if (o->flags & TEST_WITH_DUMMY_RCS) - emit_dummy_load__rcs(o, 1); + igt_spin_batch_wait(drm_fd, 1, I915_EXEC_RENDER); if (o->flags & TEST_FB_RECREATE) recreate_fb(o); -- 2.7.0 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] lib: add igt_dummyload
Generalized from auto-tuned GPU dummy workload in gem_wait and kms_flip v2 : Add recursive batch feature from Chris Cc: Chris Wilson Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- lib/Makefile.sources | 2 + lib/igt.h| 1 + lib/igt_dummyload.c | 613 +++ lib/igt_dummyload.h | 77 +++ 4 files changed, 693 insertions(+) create mode 100644 lib/igt_dummyload.c create mode 100644 lib/igt_dummyload.h diff --git a/lib/Makefile.sources b/lib/Makefile.sources index e8e277b..7fc5ec2 100644 --- a/lib/Makefile.sources +++ b/lib/Makefile.sources @@ -75,6 +75,8 @@ lib_source_list = \ igt_draw.h \ igt_pm.c\ igt_pm.h\ + igt_dummyload.c \ + igt_dummyload.h \ uwildmat/uwildmat.h \ uwildmat/uwildmat.c \ $(NULL) diff --git a/lib/igt.h b/lib/igt.h index d751f24..a0028d5 100644 --- a/lib/igt.h +++ b/lib/igt.h @@ -32,6 +32,7 @@ #include "igt_core.h" #include "igt_debugfs.h" #include "igt_draw.h" +#include "igt_dummyload.h" #include "igt_fb.h" #include "igt_gt.h" #include "igt_kms.h" diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c new file mode 100644 index 000..f7a64b7 --- /dev/null +++ b/lib/igt_dummyload.c @@ -0,0 +1,613 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "igt.h" +#include "igt_dummyload.h" +#include +#include +#include + +/** + * SECTION:igt_dummyload + * @short_description: Library for submitting GPU workloads + * @title: Dummyload + * @include: igt.h + * + * A lot of igt testcases need some GPU workload to make sure a race window is + * big enough. Unfortunately having a fixed amount of workload leads to + * spurious test failures or overtly long runtimes on some fast/slow platforms. + * This library contains functionality to submit GPU workloads that should + * consume exactly a specific amount of time. + */ + +#define USEC_PER_SEC 100L +#define NSEC_PER_SEC 10L + +#define gettid() syscall(__NR_gettid) +#define sigev_notify_thread_id _sigev_un._tid + +#define LOCAL_I915_EXEC_BSD_SHIFT (13) +#define LOCAL_I915_EXEC_BSD_MASK (3 << LOCAL_I915_EXEC_BSD_SHIFT) + +#define ENGINE_MASK (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK) + + +/* Internal data structures to avoid having to pass tons of parameters + * around. */ +struct dummy_info { + drm_intel_bufmgr *bufmgr; + struct intel_batchbuffer *batch; + int drm_fd; + uint32_t buf_handle; + uint32_t buf_stride; + uint32_t buf_tiling; + int fb_width; + int fb_height; +}; + +static void blit_copy(struct intel_batchbuffer *batch, + drm_intel_bo *dst, drm_intel_bo *src, + unsigned int width, unsigned int height, + unsigned int dst_pitch, unsigned int src_pitch) +{ + BLIT_COPY_BATCH_START(0); + OUT_BATCH((3 << 24) | /* 32 bits */ + (0xcc << 16) | /* copy ROP */ + dst_pitch); + OUT_BATCH(0 << 16 | 0); + OUT_BATCH(height << 16 | width); + OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(0 << 16 | 0); + OUT_BATCH(src_pitch); + OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0); + ADVANCE_BATCH(); + + if (batch->gen >= 6) { + BEGIN_BATCH(3, 0); + OUT_BATCH(XY_SETUP_CLIP_BLT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +static void blit_fill(struct intel_batchbuffe
[Intel-gfx] [RFC v2 i-g-t] Auto-tuned dummyload + recursive batch
A lot of igt testcases need some GPU workload to make sure a race window is big enough. Unfortunately having a fixed amount of workload leads to spurious test failures or overtly long runtimes on some fast/slow platforms. This library contains functionality to submit GPU workloads that should consume exactly a specific amount of time. v2: Add recursive batch feature from Chris and use in gem_wait and kms_flip. I've retained the previous auto-tuned dummy load functions. Let me know if we need to drop those. Abdiel Janulgue (3): lib: add igt_dummyload igt/gem_wait: Use new igt_spin_batch igt/kms_flip: Use new igt_spin_batch lib/Makefile.sources | 2 + lib/igt.h| 1 + lib/igt_dummyload.c | 613 +++ lib/igt_dummyload.h | 77 tests/gem_wait.c | 125 +- tests/kms_flip.c | 4 +- ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC i-g-t PATCH 2/3] igt/gem_wait: Use new igt_dummyload api
On 10/13/2016 06:17 PM, Daniel Vetter wrote: > On Thu, Oct 13, 2016 at 10:49:39AM +0100, Chris Wilson wrote: >> On Thu, Oct 13, 2016 at 12:31:13PM +0300, Abdiel Janulgue wrote: >>> >>> >>> On 10/12/2016 03:07 PM, Chris Wilson wrote: >>>> On Wed, Oct 12, 2016 at 02:59:53PM +0300, Abdiel Janulgue wrote: >>>>> Signed-off-by: Abdiel Janulgue >>>>> --- >>>>> tests/gem_wait.c | 77 >>>>> +--- >>>>> 1 file changed, 12 insertions(+), 65 deletions(-) >>>> >>>> We can do so much better than a dummy load here. We can precisely >>>> control how long we want the object to be busy by using a recursive >>>> batch buffer (and terminating that batch at the exact moment we require). >>>> -Chris >>>> >>> Hi Chris, I see you've posted a better solution to gem_wait. I could >>> drop this one and defer to yours instead. So for now, igt_dummyload has >>> dropped to only 1 customer at the moment: kms_flip. Let me know whether >>> it's possible to upstream this dummyload api. >> >> kms_flip would probably be better with a specific load rather than a >> dummy as well. The challenge is whether the flip works given various >> input states of the framebuffers, and the more control we have over >> those inputs the better. > > Oh yeah, this is a pretty sweet idea with a spinning batch that we > manually terminate. Assuming it works everywhere I think this is a much > better approach, and by submitting different workloads we can always put > the delay workload exactly where we want. > > Abdiel, can you pls update the JIRA to instead extract Chris' trick into a > library (pretty sure Chris can help with bikeshedding the interface to > make it all pretty) and then roll that one out? Being able to control the > time used by delay tasks down to jiffies is real sweet. Ok I'll try that. Thanks! -Abdiel > > Also there might be some space to reuse this with Mika's hangcheck stuff, > not sure. > -Daniel > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC i-g-t PATCH 2/3] igt/gem_wait: Use new igt_dummyload api
On 10/12/2016 03:07 PM, Chris Wilson wrote: > On Wed, Oct 12, 2016 at 02:59:53PM +0300, Abdiel Janulgue wrote: >> Signed-off-by: Abdiel Janulgue >> --- >> tests/gem_wait.c | 77 >> +--- >> 1 file changed, 12 insertions(+), 65 deletions(-) > > We can do so much better than a dummy load here. We can precisely > control how long we want the object to be busy by using a recursive > batch buffer (and terminating that batch at the exact moment we require). > -Chris > Hi Chris, I see you've posted a better solution to gem_wait. I could drop this one and defer to yours instead. So for now, igt_dummyload has dropped to only 1 customer at the moment: kms_flip. Let me know whether it's possible to upstream this dummyload api. -Abdiel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC i-g-t PATCH 1/3] lib: add igt_dummyload
Generalized from auto-tuned GPU dummy workload in gem_wait and kms_flip Signed-off-by: Abdiel Janulgue --- lib/Makefile.sources | 2 + lib/igt.h| 1 + lib/igt_dummyload.c | 419 +++ lib/igt_dummyload.h | 63 4 files changed, 485 insertions(+) create mode 100644 lib/igt_dummyload.c create mode 100644 lib/igt_dummyload.h diff --git a/lib/Makefile.sources b/lib/Makefile.sources index e8e277b..7fc5ec2 100644 --- a/lib/Makefile.sources +++ b/lib/Makefile.sources @@ -75,6 +75,8 @@ lib_source_list = \ igt_draw.h \ igt_pm.c\ igt_pm.h\ + igt_dummyload.c \ + igt_dummyload.h \ uwildmat/uwildmat.h \ uwildmat/uwildmat.c \ $(NULL) diff --git a/lib/igt.h b/lib/igt.h index d751f24..a0028d5 100644 --- a/lib/igt.h +++ b/lib/igt.h @@ -32,6 +32,7 @@ #include "igt_core.h" #include "igt_debugfs.h" #include "igt_draw.h" +#include "igt_dummyload.h" #include "igt_fb.h" #include "igt_gt.h" #include "igt_kms.h" diff --git a/lib/igt_dummyload.c b/lib/igt_dummyload.c new file mode 100644 index 000..908d839 --- /dev/null +++ b/lib/igt_dummyload.c @@ -0,0 +1,419 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "igt.h" +#include "igt_dummyload.h" +#include + +/** + * SECTION:igt_dummyload + * @short_description: Library for submitting auto-tuned dummy GPU workloads + * @title: Dummyload + * @include: igt.h + * + * A lot of igt testcases need some dummy load to make sure a race window is + * big enough. Unfortunately having a fixed amount of workload leads to + * spurious test failures or overtly long runtimes on some fast/slow platforms. + * This library contains functionality to submit GPU workloads that is + * dynamically tuned to consume a specific amount of time. + */ + +#define USEC_PER_SEC 100L +#define NSEC_PER_SEC 10L + +/* Internal data structures to avoid having to pass tons of parameters + * around. */ +struct dummy_info { + drm_intel_bufmgr *bufmgr; + struct intel_batchbuffer *batch; + int drm_fd; + uint32_t buf_handle; + uint32_t buf_stride; + uint32_t buf_tiling; + int fb_width; + int fb_height; +}; + +static void blit_copy(struct intel_batchbuffer *batch, + drm_intel_bo *dst, drm_intel_bo *src, + unsigned int width, unsigned int height, + unsigned int dst_pitch, unsigned int src_pitch) +{ + BLIT_COPY_BATCH_START(0); + OUT_BATCH((3 << 24) | /* 32 bits */ + (0xcc << 16) | /* copy ROP */ + dst_pitch); + OUT_BATCH(0 << 16 | 0); + OUT_BATCH(height << 16 | width); + OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(0 << 16 | 0); + OUT_BATCH(src_pitch); + OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0); + ADVANCE_BATCH(); + + if (batch->gen >= 6) { + BEGIN_BATCH(3, 0); + OUT_BATCH(XY_SETUP_CLIP_BLT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + ADVANCE_BATCH(); + } +} + +static void blit_fill(struct intel_batchbuffer *batch, drm_intel_bo *dst, + unsigned int width, unsigned int height) +{ + COLOR_BLIT_COPY_BATCH_START(COLOR_BLT_WRITE_ALPHA | + XY_COLOR_BLT_WRITE_RGB); + OUT_BATCH((3 << 24) | /* 32 Bit Color */ + (0xF0 << 16) | /* Raster OP copy background register */ + 0);
[Intel-gfx] [RFC i-g-t PATCH 3/3] igt/kms_flip: Use new igt_dummyload api
Signed-off-by: Abdiel Janulgue --- tests/kms_flip.c | 191 +++ 1 file changed, 10 insertions(+), 181 deletions(-) diff --git a/tests/kms_flip.c b/tests/kms_flip.c index 065ad66..93cf391 100644 --- a/tests/kms_flip.c +++ b/tests/kms_flip.c @@ -187,109 +187,6 @@ static unsigned long gettime_us(void) return ts.tv_sec * 100 + ts.tv_nsec / 1000; } -static int calibrate_dummy_load(struct test_output *o, - const char *ring_name, - int (*emit)(struct test_output *o, int limit, int timeout)) -{ - unsigned long start; - int ops = 1; - - start = gettime_us(); - - do { - unsigned long diff; - int ret; - - ret = emit(o, (ops+1)/2, 10); - diff = gettime_us() - start; - - if (ret || diff / USEC_PER_SEC >= 1) - break; - - ops += ops; - } while (ops < 10); - - igt_debug("%s dummy load calibrated: %d operations / second\n", - ring_name, ops); - - return ops; -} - -static void blit_copy(drm_intel_bo *dst, drm_intel_bo *src, - unsigned int width, unsigned int height, - unsigned int dst_pitch, unsigned int src_pitch) -{ - BLIT_COPY_BATCH_START(0); - OUT_BATCH((3 << 24) | /* 32 bits */ - (0xcc << 16) | /* copy ROP */ - dst_pitch); - OUT_BATCH(0 << 16 | 0); - OUT_BATCH(height << 16 | width); - OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - OUT_BATCH(0 << 16 | 0); - OUT_BATCH(src_pitch); - OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0); - ADVANCE_BATCH(); - - if (batch->gen >= 6) { - BEGIN_BATCH(3, 0); - OUT_BATCH(XY_SETUP_CLIP_BLT_CMD); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } -} - -static int _emit_dummy_load__bcs(struct test_output *o, int limit, int timeout) -{ - int i, ret = 0; - drm_intel_bo *src_bo, *dst_bo, *fb_bo; - struct igt_fb *fb_info = &o->fb_info[o->current_fb_id]; - - igt_require(bufmgr); - - src_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096); - igt_assert(src_bo); - - dst_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096); - igt_assert(dst_bo); - - fb_bo = gem_handle_to_libdrm_bo(bufmgr, drm_fd, "imported", fb_info->gem_handle); - igt_assert(fb_bo); - - for (i = 0; i < limit; i++) { - blit_copy(dst_bo, src_bo, - 2048, 2048, - 2048*4, 2048*4); - - igt_swap(src_bo, dst_bo); - } - blit_copy(fb_bo, src_bo, - min(o->fb_width, 2048), min(o->fb_height, 2048), - fb_info->stride, 2048*4); - intel_batchbuffer_flush(batch); - - if (timeout > 0) - ret = drm_intel_gem_bo_wait(fb_bo, timeout * NSEC_PER_SEC); - - drm_intel_bo_unreference(src_bo); - drm_intel_bo_unreference(dst_bo); - drm_intel_bo_unreference(fb_bo); - - return ret; -} - -static void emit_dummy_load__bcs(struct test_output *o, int seconds) -{ - static int ops_per_sec; - - if (ops_per_sec == 0) - ops_per_sec = calibrate_dummy_load(o, "bcs", - _emit_dummy_load__bcs); - - _emit_dummy_load__bcs(o, seconds * ops_per_sec, 0); -} - static void emit_fence_stress(struct test_output *o) { const int num_fences = gem_available_fences(drm_fd); @@ -334,82 +231,6 @@ static void emit_fence_stress(struct test_output *o) free(exec); } -static int _emit_dummy_load__rcs(struct test_output *o, int limit, int timeout) -{ - const struct igt_fb *fb_info = &o->fb_info[o->current_fb_id]; - igt_render_copyfunc_t copyfunc; - struct igt_buf sb[3], *src, *dst, *fb; - int i, ret = 0; - - igt_require(bufmgr); - - copyfunc = igt_get_render_copyfunc(devid); - if (copyfunc == NULL) - return _emit_dummy_load__bcs(o, limit, timeout); - - sb[0].bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096); - igt_assert(sb[0].bo); - sb[0].size = sb[0].bo->size; - sb[0].tiling = I915_TILING_NONE; - sb[0].data = NULL; - sb[0].num_tiles = sb[0].bo->size; - sb[0].stride = 4 * 2048; - - sb[1].bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096); - igt_assert(sb[1].bo); - sb[1].size = sb[1].bo->size; - sb[1].tiling = I915_TILING_NONE; - sb[1].data = NULL; - sb[1].num_tiles = sb[1].bo->size; -
[Intel-gfx] [RFC i-g-t] Extract autotuned dummy load into lib
A lot of igt testcases need some dummy load to make sure a race window is big enough. Unfortunately having a fixed amount of workload leads to spurious test failures or overtly long runtimes on some fast/slow platforms. This library contains functionality to submit GPU workloads that is dynamically tuned to consume a specific amount of time. This functionality is generalized to lib from existing features in gem_wait and kms_flip. In the future, we could update test cases that could benefit from auto-tuned dummy workloads to use this new api. Abdiel Janulgue (3): lib: add igt_dummyload igt/gem_wait: Use new igt_dummyload api igt/kms_flip: Use new igt_dummyload api lib/Makefile.sources | 2 + lib/igt.h| 1 + lib/igt_dummyload.c | 419 +++ lib/igt_dummyload.h | 63 ++ tests/gem_wait.c | 77 ++- tests/kms_flip.c | 191 +- 6 files changed, 507 insertions(+), 246 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC i-g-t PATCH 2/3] igt/gem_wait: Use new igt_dummyload api
Signed-off-by: Abdiel Janulgue --- tests/gem_wait.c | 77 +--- 1 file changed, 12 insertions(+), 65 deletions(-) diff --git a/tests/gem_wait.c b/tests/gem_wait.c index 461efdb..24a5f5e 100644 --- a/tests/gem_wait.c +++ b/tests/gem_wait.c @@ -54,36 +54,6 @@ #define BUF_PAGES ((8<<20)>>12) drm_intel_bo *dst, *dst2; -/* returns time diff in milliseconds */ -static int64_t -do_time_diff(struct timespec *end, struct timespec *start) -{ - int64_t ret; - ret = (MSEC_PER_SEC * difftime(end->tv_sec, start->tv_sec)) + - ((end->tv_nsec/NSEC_PER_MSEC) - (start->tv_nsec/NSEC_PER_MSEC)); - return ret; -} - -static void blt_color_fill(struct intel_batchbuffer *batch, - drm_intel_bo *buf, - const unsigned int pages) -{ - const unsigned short height = pages/4; - const unsigned short width = 4096; - - COLOR_BLIT_COPY_BATCH_START(COLOR_BLT_WRITE_ALPHA | - XY_COLOR_BLT_WRITE_RGB); - OUT_BATCH((3 << 24) | /* 32 Bit Color */ - (0xF0 << 16) | /* Raster OP copy background register */ - 0); /* Dest pitch is 0 */ - OUT_BATCH(0); - OUT_BATCH(width << 16 | - height); - OUT_RELOC_FENCED(buf, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - OUT_BATCH(rand()); /* random pattern */ - ADVANCE_BATCH(); -} - static void render_timeout(int fd) { drm_intel_bufmgr *bufmgr; @@ -91,10 +61,11 @@ static void render_timeout(int fd) int64_t timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC; int64_t negative_timeout = -1; int ret; + const unsigned short height = BUF_PAGES/4; + const unsigned short width = 4096; const bool do_signals = true; /* signals will seem to make the operation * use less process CPU time */ - bool done = false; - int i, iter = 1; + int iter = 1; igt_skip_on_simulation(); @@ -112,30 +83,9 @@ static void render_timeout(int fd) /* Figure out a rough number of fills required to consume 1 second of * GPU work. */ - do { - struct timespec start, end; - long diff; - -#ifndef CLOCK_MONOTONIC_RAW -#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC -#endif - - igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &start) == 0); - for (i = 0; i < iter; i++) - blt_color_fill(batch, dst, BUF_PAGES); - intel_batchbuffer_flush(batch); - drm_intel_bo_wait_rendering(dst); - igt_assert(clock_gettime(CLOCK_MONOTONIC_RAW, &end) == 0); - - diff = do_time_diff(&end, &start); - igt_assert(diff >= 0); - - if ((diff / MSEC_PER_SEC) > ENOUGH_WORK_IN_SECONDS) - done = true; - else - iter <<= 1; - } while (!done && iter < 100); - + iter = igt_calibrate_dummy_load(bufmgr, batch, fd, + dst->handle, 0, + width, height, 1, IGT_DUMMY_BLIT_FILL); igt_assert_lt(iter, 100); igt_debug("%d iters is enough work\n", iter); @@ -146,10 +96,9 @@ static void render_timeout(int fd) /* We should be able to do half as much work in the same amount of time, * but because we might schedule almost twice as much as required, we * might accidentally time out. Hence add some fudge. */ - for (i = 0; i < iter/3; i++) - blt_color_fill(batch, dst2, BUF_PAGES); - intel_batchbuffer_flush(batch); + igt_emit_dummy_load(bufmgr, batch, fd, dst2->handle, 0, + width, height, iter/3, 0, IGT_DUMMY_BLIT_FILL); igt_assert(gem_bo_busy(fd, dst2->handle) == true); igt_assert_eq(gem_wait(fd, dst2->handle, &timeout), 0); @@ -168,10 +117,9 @@ static void render_timeout(int fd) /* Now check that we correctly time out, twice the auto-tune load should * be good enough. */ timeout = ENOUGH_WORK_IN_SECONDS * NSEC_PER_SEC; - for (i = 0; i < iter*2; i++) - blt_color_fill(batch, dst2, BUF_PAGES); - intel_batchbuffer_flush(batch); + igt_emit_dummy_load(bufmgr, batch, fd, dst2->handle, 0, + width, height, iter*2, 0, IGT_DUMMY_BLIT_FILL); ret = gem_wait(fd, dst2->handle, &timeout); igt_assert_eq(ret, -ETIME); @@ -186,10 +134,9 @@ static void render_timeout(int fd) /* Now check that we can pass negative (infinite) timeouts. */ negative_timeout = -1; - for (i = 0; i < iter; i++) - blt_
Re: [Intel-gfx] [PATCH v4] drm/i915: Adjust size of PIPE_CONTROL used for gen8 render seqno write
On 04/12/2016 04:51 PM, Michał Winiarski wrote: > We started to use PIPE_CONTROL to write render ring seqno in order to > combat seqno write vs interrupt generation problems. This was introduced > by commit 7c17d377374d ("drm/i915: Use ordered seqno write interrupt > generation on gen8+ execlists"). > > On gen8+ size of PIPE_CONTROL with Post Sync Operation should be > 6 dwords. When we're using older 5-dword variant it's possible to > observe inconsistent values written by PIPE_CONTROL with Post > Sync Operation from user batches, resulting in rendering corruptions. > > v2: Fix BAT failures > v3: Comments on alignment and thrashing high dword of seqno (Chris) > v4: Updated commit msg (Mika) > > Testcase: igt/gem_pipe_control_store_loop/*-qword-write > Issue: VIZ-7393 > Cc: sta...@vger.kernel.org > Cc: Chris Wilson > Cc: Mika Kuoppala > Cc: Abdiel Janulgue Tested-by: Abdiel Janulgue > Signed-off-by: Michał Winiarski > --- > drivers/gpu/drm/i915/intel_lrc.c | 10 -- > 1 file changed, 8 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/i915/intel_lrc.c > b/drivers/gpu/drm/i915/intel_lrc.c > index 0d6dc5e..30abe53 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -1945,15 +1945,18 @@ static int gen8_emit_request_render(struct > drm_i915_gem_request *request) > struct intel_ringbuffer *ringbuf = request->ringbuf; > int ret; > > - ret = intel_logical_ring_begin(request, 6 + WA_TAIL_DWORDS); > + ret = intel_logical_ring_begin(request, 8 + WA_TAIL_DWORDS); > if (ret) > return ret; > > + /* We're using qword write, seqno should be aligned to 8 bytes. */ > + BUILD_BUG_ON(I915_GEM_HWS_INDEX & 1); > + > /* w/a for post sync ops following a GPGPU operation we >* need a prior CS_STALL, which is emitted by the flush >* following the batch. >*/ > - intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(5)); > + intel_logical_ring_emit(ringbuf, GFX_OP_PIPE_CONTROL(6)); > intel_logical_ring_emit(ringbuf, > (PIPE_CONTROL_GLOBAL_GTT_IVB | >PIPE_CONTROL_CS_STALL | > @@ -1961,7 +1964,10 @@ static int gen8_emit_request_render(struct > drm_i915_gem_request *request) > intel_logical_ring_emit(ringbuf, hws_seqno_address(request->engine)); > intel_logical_ring_emit(ringbuf, 0); > intel_logical_ring_emit(ringbuf, i915_gem_request_get_seqno(request)); > + /* We're thrashing one dword of HWS. */ > + intel_logical_ring_emit(ringbuf, 0); > intel_logical_ring_emit(ringbuf, MI_USER_INTERRUPT); > + intel_logical_ring_emit(ringbuf, MI_NOOP); > return intel_logical_ring_advance_and_submit(request); > } > > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 5/5] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
On 07/06/2015 11:28 AM, Daniel Vetter wrote: > On Thu, Jul 02, 2015 at 11:15:40AM +0100, Chris Wilson wrote: >> On Wed, Jul 01, 2015 at 10:12:23AM +0300, Abdiel Janulgue wrote: >>> Ensures that the batch buffer is executed by the resource streamer >>> >>> v2: Don't skip 1<<15 for the exec flags (Jani Nikula) >>> v3: Use HAS_RESOURCE_STREAMER macro for execbuf validation (Chris Wilson) >>> >>> Testcase: igt/gem_exec_params >>> Cc: Jani Nikula >>> Reviewed-by: Chris Wilson >>> Signed-off-by: Abdiel Janulgue >> >> This no longer applies (unrecognised base). Honestly, I would prefer 4&5 >> squashed together, or 4 after 5 so that we do not declare >> HAS_RESOURCE_STREAMER before we accept the RS execbuf. >> >> Minor bit of patch reordering, but the code in 4 looks ok, so >> Reviewed-by: Chris Wilson # for 4/5 > > Yeah I just reordered 5 to go before 4. Series merged to dinq, thanks. > Abdiel can you pls push the corresponding igts (or ask Thomas to do it for > you)? Thank you! I don't have commit rights in igt, but here is the patch: http://lists.freedesktop.org/archives/intel-gfx/2015-June/068799.html -Abdiel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 5/5] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer v2: Don't skip 1<<15 for the exec flags (Jani Nikula) v3: Use HAS_RESOURCE_STREAMER macro for execbuf validation (Chris Wilson) Testcase: igt/gem_exec_params Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++ include/uapi/drm/i915_drm.h| 7 ++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 600db74..83577c6 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1490,6 +1490,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!HAS_RESOURCE_STREAMER(dev)) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 51137bd..e7c29f1 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -766,7 +766,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<15) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v4 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam
This will let userspace know whether Resource Streamer is supported in the kernel. v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after I915_PARAM_HAS_GPU_RESET. v3: Only advertise RS support for hardware that supports it. v4: Add HAS_RESOURCE_STREAMER() macro (Chris) Suggested-by: Kenneth Graunke Cc: Kenneth Graunke Cc: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ drivers/gpu/drm/i915/i915_drv.h | 3 +++ include/uapi/drm/i915_drm.h | 1 + 3 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index c5349fa..a42f165 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data, value = i915.enable_hangcheck && intel_has_gpu_reset(dev); break; + case I915_PARAM_HAS_RESOURCE_STREAMER: + value = HAS_RESOURCE_STREAMER(dev); + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index ea9caf2..9968663 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2520,6 +2520,9 @@ struct drm_i915_cmd_table { #define HAS_CSR(dev) (IS_SKYLAKE(dev)) +#define HAS_RESOURCE_STREAMER(dev) (IS_HASWELL(dev) || \ + INTEL_INFO(dev)->gen >= 8) + #define INTEL_PCH_DEVICE_ID_MASK 0xff00 #define INTEL_PCH_IBX_DEVICE_ID_TYPE 0x3b00 #define INTEL_PCH_CPT_DEVICE_ID_TYPE 0x1c00 diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f88cc1c..51137bd 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { int param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam
On 06/24/2015 09:30 AM, Abdiel Janulgue wrote: > > > On 06/16/2015 03:41 PM, Abdiel Janulgue wrote: >> This will let userspace know whether Resource Streamer is supported >> in the kernel. >> >> v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after >> I915_PARAM_HAS_GPU_RESET. >> v3: Only advertise RS support for hardware that supports it. > > Ping. Any status on this one? Chris? > >> >> Suggested-by: Kenneth Graunke >> Cc: Kenneth Graunke >> Signed-off-by: Abdiel Janulgue >> --- >> drivers/gpu/drm/i915/i915_dma.c | 3 +++ >> include/uapi/drm/i915_drm.h | 1 + >> 2 files changed, 4 insertions(+) >> >> diff --git a/drivers/gpu/drm/i915/i915_dma.c >> b/drivers/gpu/drm/i915/i915_dma.c >> index 88795d2..4f55f51 100644 >> --- a/drivers/gpu/drm/i915/i915_dma.c >> +++ b/drivers/gpu/drm/i915/i915_dma.c >> @@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void >> *data, >> i915.reset && >> intel_has_gpu_reset(dev); >> break; >> +case I915_PARAM_HAS_RESOURCE_STREAMER: >> +value = IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8; >> + break; >> default: >> DRM_DEBUG("Unknown parameter %d\n", param->param); >> return -EINVAL; >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index f88cc1c..51137bd 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { >> #define I915_PARAM_SUBSLICE_TOTAL33 >> #define I915_PARAM_EU_TOTAL 34 >> #define I915_PARAM_HAS_GPU_RESET 35 >> +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 >> >> typedef struct drm_i915_getparam { >> int param; >> > ___ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam
On 06/16/2015 03:41 PM, Abdiel Janulgue wrote: > This will let userspace know whether Resource Streamer is supported > in the kernel. > > v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after > I915_PARAM_HAS_GPU_RESET. > v3: Only advertise RS support for hardware that supports it. Ping. Any status on this one? > > Suggested-by: Kenneth Graunke > Cc: Kenneth Graunke > Signed-off-by: Abdiel Janulgue > --- > drivers/gpu/drm/i915/i915_dma.c | 3 +++ > include/uapi/drm/i915_drm.h | 1 + > 2 files changed, 4 insertions(+) > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c > index 88795d2..4f55f51 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void > *data, > i915.reset && > intel_has_gpu_reset(dev); > break; > + case I915_PARAM_HAS_RESOURCE_STREAMER: > + value = IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index f88cc1c..51137bd 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_SUBSLICE_TOTAL 33 > #define I915_PARAM_EU_TOTAL 34 > #define I915_PARAM_HAS_GPU_RESET 35 > +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 > > typedef struct drm_i915_getparam { > int param; > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam
This will let userspace know whether Resource Streamer is supported in the kernel. v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after I915_PARAM_HAS_GPU_RESET. v3: Only advertise RS support for hardware that supports it. Suggested-by: Kenneth Graunke Cc: Kenneth Graunke Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 88795d2..4f55f51 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void *data, i915.reset && intel_has_gpu_reset(dev); break; + case I915_PARAM_HAS_RESOURCE_STREAMER: + value = IS_HASWELL(dev) || INTEL_INFO(dev)->gen >= 8; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f88cc1c..51137bd 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { int param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2 resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam
This will let userspace know whether Resource Streamer is supported in the kernel. v2: Update I915_PARAM_HAS_RESOURCE_STREAMER so it's after I915_PARAM_HAS_GPU_RESET. Suggested-by: Kenneth Graunke Cc: Kenneth Graunke Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 88795d2..e7ad30f 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -168,6 +168,9 @@ static int i915_getparam(struct drm_device *dev, void *data, i915.reset && intel_has_gpu_reset(dev); break; + case I915_PARAM_HAS_RESOURCE_STREAMER: + value = 1; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index f88cc1c..51137bd 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 #define I915_PARAM_HAS_GPU_RESET35 +#define I915_PARAM_HAS_RESOURCE_STREAMER 36 typedef struct drm_i915_getparam { int param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 5/5] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer v2: Don't skip 1<<15 for the exec flags (Jani Nikula) Testcase: igt/gem_exec_params Cc: Jani Nikula Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++ include/uapi/drm/i915_drm.h| 7 ++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e79..ae064f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1485,6 +1485,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 40ab1c8..debccd34 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -761,7 +761,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<15) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 4/5] drm/i915: add I915_PARAM_HAS_RESOURCE_STREAMER to i915_getparam
This will let userspace know whether Resource Streamer is supported in the kernel. Suggested-by: Kenneth Graunke Cc: Kenneth Graunke Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_dma.c | 3 +++ include/uapi/drm/i915_drm.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 68e0c85..10d4ead 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -163,6 +163,9 @@ static int i915_getparam(struct drm_device *dev, void *data, if (!value) return -ENODEV; break; + case I915_PARAM_HAS_RESOURCE_STREAMER: + value = 1; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b673..40ab1c8 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_REVISION 32 #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_RESOURCE_STREAMER 35 typedef struct drm_i915_getparam { int param; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 3/5] drm/i915: Enable resource streamer on Execlists
GEN8 and above uses Execlists by default instead of the legacy ringbuffer for batch execution. This patch enables the resource streamer bits when required. Patch is based on the initial work by Minu Mathai This version also adds the required bits to enable GEN8 Resource Streamer context save and restore for Execlists. Cc: ville.syrj...@linux.intel.com Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/intel_lrc.c | 8 ++-- drivers/gpu/drm/i915/intel_lrc.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fcb074b..b015e96 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1172,7 +1172,10 @@ static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | + (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? +MI_BATCH_RESOURCE_STREAMER : 0)); intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); intel_logical_ring_emit(ringbuf, MI_NOOP); @@ -1726,7 +1729,8 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); reg_state[CTX_CONTEXT_CONTROL+1] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index adb731e4..de6087a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -32,6 +32,7 @@ #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) +#define CTX_CTRL_RS_CTX_ENABLE(1 << 1) #define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) #define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 2/5] drm/i915: Enable Resource Streamer state save/restore on MI_SET_CONTEXT
Also clarify comments on context size that the extra state for Resource Streamer is included. v2: Don't remove the extended save/restore enabled for older platforms. (Ville) Use new MI_SET_CONTEXT defines for HSW RS save/restore state instead of extended save/restore. (Daniel) Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 5 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f3e84c4..a29dbcd 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -509,7 +509,9 @@ mi_set_context(struct intel_engine_cs *ring, } /* These flags are for resource streamer on HSW+ */ - if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) + if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8) + flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); + else if (INTEL_INFO(ring->dev)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 238bb25..2b1321d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -316,6 +316,8 @@ #define MI_RESTORE_EXT_STATE_EN (1<<2) #define MI_FORCE_RESTORE (1<<1) #define MI_RESTORE_INHIBIT (1<<0) +#define HSW_MI_RS_SAVE_STATE_EN (1<<3) +#define HSW_MI_RS_RESTORE_STATE_EN(1<<2) #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ @@ -2498,7 +2500,8 @@ enum skl_disp_power_wells { * valid. Now, docs explain in dwords what is in the context object. The full * size is 70720 bytes, however, the power context and execlist context will * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) /* Same as Haswell, but 72064 bytes now. */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 1/5] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for enabling the resource streamer on the legacy ringbuffer for HSW and GEN8. Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b522eb6..238bb25 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -356,6 +356,7 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_PREDICATE_SRC0 (0x2400) #define MI_PREDICATE_SRC1 (0x2408) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..715cb2a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2385,7 +2385,9 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? +MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2408,7 +2410,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? -0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? +MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe0..3521bc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -167,6 +167,7 @@ struct intel_engine_cs { unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_engine_cs *ring); /* GEN8 signal/wait table - never trust comments! -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t v3] tests/gem_exec_params: check invalid flags for Resource Streamer
Make sure resource streamer flags works only in correct ring in addition to checking next flag after the RS boundary fails. v2: Make sure we reject RS on pre-hsw. v3: Don't skip 1<<15 for the exec flags (Jani Nikula) Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- tests/gem_exec_params.c | 27 ++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c index 54f0dc3..e9c13a4 100644 --- a/tests/gem_exec_params.c +++ b/tests/gem_exec_params.c @@ -48,6 +48,7 @@ #define LOCAL_I915_EXEC_BSD_MASK (3<<13) #define LOCAL_I915_EXEC_BSD_RING1 (1<<13) #define LOCAL_I915_EXEC_BSD_RING2 (2<<13) +#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<15) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; @@ -220,7 +221,7 @@ igt_main /* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle */ igt_subtest("invalid-flag") { - execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1); + execbuf.flags = I915_EXEC_RENDER | (LOCAL_I915_EXEC_RESOURCE_STREAMER << 1); RUN_FAIL(EINVAL); } @@ -234,6 +235,30 @@ igt_main execbuf.num_cliprects = 0; } + igt_subtest("rs-invalid-on-bsd-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_BSD | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-on-blt-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_BLT | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-on-vebox-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_VEBOX | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-gen") { + igt_require(!IS_HASWELL(devid) && intel_gen(devid) < 8); + execbuf.flags = I915_EXEC_RENDER | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + #define DIRT(name) \ igt_subtest(#name "-dirt") { \ execbuf.flags = 0; \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START
On 06/08/2015 07:10 PM, Ville Syrjälä wrote: > On Mon, Jun 08, 2015 at 01:04:07PM +0300, Abdiel Janulgue wrote: >> Adds support for executing the resource streamer on BDW and HSW >> >> v2: Add support for Execlists (Minu Mathai ) >> >> Reviewed-by: Chris Wilson >> Signed-off-by: Abdiel Janulgue >> --- >> drivers/gpu/drm/i915/i915_reg.h | 1 + >> drivers/gpu/drm/i915/intel_lrc.c| 4 +++- >> drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++-- >> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + >> 4 files changed, 11 insertions(+), 3 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_reg.h >> b/drivers/gpu/drm/i915/i915_reg.h >> index b522eb6..238bb25 100644 >> --- a/drivers/gpu/drm/i915/i915_reg.h >> +++ b/drivers/gpu/drm/i915/i915_reg.h >> @@ -356,6 +356,7 @@ >> #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) >> #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on >> gen4 */ >> #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) >> +#define MI_BATCH_RESOURCE_STREAMER (1<<10) >> >> #define MI_PREDICATE_SRC0 (0x2400) >> #define MI_PREDICATE_SRC1 (0x2408) >> diff --git a/drivers/gpu/drm/i915/intel_lrc.c >> b/drivers/gpu/drm/i915/intel_lrc.c >> index fcb074b..3b168f6 100644 >> --- a/drivers/gpu/drm/i915/intel_lrc.c >> +++ b/drivers/gpu/drm/i915/intel_lrc.c >> @@ -1172,7 +1172,9 @@ static int gen8_emit_bb_start(struct intel_ringbuffer >> *ringbuf, >> return ret; >> >> /* FIXME(BDW): Address space and security selectors. */ >> -intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | >> (ppgtt<<8)); >> +intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | >> +(ppgtt<<8) | (I915_DISPATCH_RS ? > > That doesn't look right. Yay.. Didn't catch these since this path never gets executed under GEN8 anyway which uses execlist not legacy batch buffer execution. Better remove this then. > >> + MI_BATCH_RESOURCE_STREAMER : 0)); >> intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); >> intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); >> intel_logical_ring_emit(ringbuf, MI_NOOP); >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c >> b/drivers/gpu/drm/i915/intel_ringbuffer.c >> index 441e250..715cb2a 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c >> @@ -2385,7 +2385,9 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs >> *ring, >> return ret; >> >> /* FIXME(BDW): Address space and security selectors. */ >> -intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); >> +intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | >> +(dispatch_flags & I915_DISPATCH_RS ? >> + MI_BATCH_RESOURCE_STREAMER : 0)); >> intel_ring_emit(ring, lower_32_bits(offset)); >> intel_ring_emit(ring, upper_32_bits(offset)); >> intel_ring_emit(ring, MI_NOOP); >> @@ -2408,7 +2410,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs >> *ring, >> intel_ring_emit(ring, >> MI_BATCH_BUFFER_START | >> (dispatch_flags & I915_DISPATCH_SECURE ? >> - 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); >> + 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | >> +(dispatch_flags & I915_DISPATCH_RS ? >> + MI_BATCH_RESOURCE_STREAMER : 0)); >> /* bit0-7 is the length on GEN6+ */ >> intel_ring_emit(ring, offset); >> intel_ring_advance(ring); >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h >> b/drivers/gpu/drm/i915/intel_ringbuffer.h >> index c761fe0..3521bc0 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h >> @@ -167,6 +167,7 @@ struct intel_engine_cs { >> unsigned dispatch_flags); >> #define I915_DISPATCH_SECURE 0x1 >> #define I915_DISPATCH_PINNED 0x2 >> +#define I915_DISPATCH_RS 0x4 >> void(*cleanup)(struct intel_engine_cs *ring); >> >> /* GEN8 signal/wait table - never trust comments! >> -- >> 1.9.1 >> >> ___ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer v3
terribly sorry for the typos: On 06/08/2015 01:04 PM, Abdiel Janulgue wrote: > > Although most of this patches were alread reviewed, I am resending them *already > due to additional changes suggested by Jani Nikula. In addition > Mesa folks want RS to be working with hiccups on GEN8 as well so *without ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer v2: Don't skip 1<<15 for the exec flags (Jani Nikula). Add I915_PARAM_HAS_RESOURCE_STREAMER to check if kernel has RS support baked in (Kenneth Graunke). Testcase: igt/gem_exec_params Cc: Jani Nikula Cc: kenn...@whitecape.org Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++ include/uapi/drm/i915_drm.h| 8 +++- 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 68e0c85..10d4ead 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -163,6 +163,9 @@ static int i915_getparam(struct drm_device *dev, void *data, if (!value) return -ENODEV; break; + case I915_PARAM_HAS_RESOURCE_STREAMER: + value = 1; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e79..ae064f0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1485,6 +1485,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b673..debccd34 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_REVISION 32 #define I915_PARAM_SUBSLICE_TOTAL 33 #define I915_PARAM_EU_TOTAL 34 +#define I915_PARAM_HAS_RESOURCE_STREAMER 35 typedef struct drm_i915_getparam { int param; @@ -760,7 +761,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<15) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/3] drm/i915: Enable Resource Streamer state save/restore
Also clarify comments on context size that the extra state for Resource Streamer is included. v2: Don't remove the extended save/restore enabled for older platforms. (Ville) Use new MI_SET_CONTEXT defines for HSW RS save/restore state instead of extended save/restore. (Daniel) v3: Add RS save restore for GEN8 Cc: ville.syrj...@linux.intel.com Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 5 - drivers/gpu/drm/i915/intel_lrc.c| 3 ++- drivers/gpu/drm/i915/intel_lrc.h| 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f3e84c4..a29dbcd 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -509,7 +509,9 @@ mi_set_context(struct intel_engine_cs *ring, } /* These flags are for resource streamer on HSW+ */ - if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) + if (IS_HASWELL(ring->dev) || INTEL_INFO(ring->dev)->gen >= 8) + flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); + else if (INTEL_INFO(ring->dev)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 238bb25..2b1321d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -316,6 +316,8 @@ #define MI_RESTORE_EXT_STATE_EN (1<<2) #define MI_FORCE_RESTORE (1<<1) #define MI_RESTORE_INHIBIT (1<<0) +#define HSW_MI_RS_SAVE_STATE_EN (1<<3) +#define HSW_MI_RS_RESTORE_STATE_EN(1<<2) #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ @@ -2498,7 +2500,8 @@ enum skl_disp_power_wells { * valid. Now, docs explain in dwords what is in the context object. The full * size is 70720 bytes, however, the power context and execlist context will * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) /* Same as Haswell, but 72064 bytes now. */ diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index 3b168f6..50af986 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1728,7 +1728,8 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o reg_state[CTX_CONTEXT_CONTROL] = RING_CONTEXT_CONTROL(ring); reg_state[CTX_CONTEXT_CONTROL+1] = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH | - CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); reg_state[CTX_RING_HEAD] = RING_HEAD(ring->mmio_base); reg_state[CTX_RING_HEAD+1] = 0; reg_state[CTX_RING_TAIL] = RING_TAIL(ring->mmio_base); diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h index adb731e4..de6087a 100644 --- a/drivers/gpu/drm/i915/intel_lrc.h +++ b/drivers/gpu/drm/i915/intel_lrc.h @@ -32,6 +32,7 @@ #define RING_CONTEXT_CONTROL(ring) ((ring)->mmio_base+0x244) #define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) #define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) +#define CTX_CTRL_RS_CTX_ENABLE(1 << 1) #define RING_CONTEXT_STATUS_BUF(ring) ((ring)->mmio_base+0x370) #define RING_CONTEXT_STATUS_PTR(ring) ((ring)->mmio_base+0x3a0) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer v3
Hi, Although most of this patches were alread reviewed, I am resending them due to additional changes suggested by Jani Nikula. In addition Mesa folks want RS to be working with hiccups on GEN8 as well so I added the necessary support for that platform as well. Changes since last posting: - Make RS context save and restore work properly on GEN8 - Add I915_PARAM_HAS_RESOURCE_STREAMER params. [PATCH 1/3] drm/i915: Enable resource streamer bits on [PATCH 2/3] drm/i915: Enable Resource Streamer state save/restore [PATCH 3/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for executing the resource streamer on BDW and HSW v2: Add support for Execlists (Minu Mathai ) Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c| 4 +++- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++-- drivers/gpu/drm/i915/intel_ringbuffer.h | 1 + 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b522eb6..238bb25 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -356,6 +356,7 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_PREDICATE_SRC0 (0x2400) #define MI_PREDICATE_SRC1 (0x2408) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fcb074b..3b168f6 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1172,7 +1172,9 @@ static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | + (ppgtt<<8) | (I915_DISPATCH_RS ? + MI_BATCH_RESOURCE_STREAMER : 0)); intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); intel_logical_ring_emit(ringbuf, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..715cb2a 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2385,7 +2385,9 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? +MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2408,7 +2410,9 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? -0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? +MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe0..3521bc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -167,6 +167,7 @@ struct intel_engine_cs { unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_engine_cs *ring); /* GEN8 signal/wait table - never trust comments! -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 2/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for executing the resource streamer on BDW and HSW v2: Add support for Execlists (Minu Mathai ) Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c| 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 -- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b522eb6..238bb25 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -356,6 +356,7 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_PREDICATE_SRC0 (0x2400) #define MI_PREDICATE_SRC1 (0x2408) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fcb074b..d523494 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1172,7 +1172,8 @@ static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); intel_logical_ring_emit(ringbuf, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..9045144 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? -0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer Testcase: igt/gem_exec_params Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e79..8a0abbb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 " + "and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe0..3521bc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -167,6 +167,7 @@ struct intel_engine_cs { unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_engine_cs *ring); /* GEN8 signal/wait table - never trust comments! diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b673..a4c1a5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<16) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH resend 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW
Also clarify comments on context size that the extra state for Resource Streamer is included. v2: Don't remove the extended save/restore enabled for older platforms. (Ville) Use new MI_SET_CONTEXT defines for HSW RS save/restore state instead of extended save/restore. (Daniel) Suggested-by: Chris Wilson Reviewed-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +++- drivers/gpu/drm/i915/i915_reg.h | 5 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f3e84c4..1a521dd 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -509,7 +509,9 @@ mi_set_context(struct intel_engine_cs *ring, } /* These flags are for resource streamer on HSW+ */ - if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) + if (IS_HASWELL(ring->dev)) + flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN); + else if (INTEL_INFO(ring->dev)->gen < 8) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 238bb25..2b1321d 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -316,6 +316,8 @@ #define MI_RESTORE_EXT_STATE_EN (1<<2) #define MI_FORCE_RESTORE (1<<1) #define MI_RESTORE_INHIBIT (1<<0) +#define HSW_MI_RS_SAVE_STATE_EN (1<<3) +#define HSW_MI_RS_RESTORE_STATE_EN(1<<2) #define MI_SEMAPHORE_SIGNALMI_INSTR(0x1b, 0) /* GEN8+ */ #define MI_SEMAPHORE_TARGET(engine) ((engine)<<15) #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */ @@ -2498,7 +2500,8 @@ enum skl_disp_power_wells { * valid. Now, docs explain in dwords what is in the context object. The full * size is 70720 bytes, however, the power context and execlist context will * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) /* Same as Haswell, but 72064 bytes now. */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v2] tests/gem_exec_params: check invalid flags for Resource Streamer
Make sure resource streamer flags works only in correct ring in addition to checking next flag after the RS boundary fails. v2: Make sure we reject RS on pre-hsw. Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- tests/gem_exec_params.c | 27 ++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c index 54f0dc3..f374226 100644 --- a/tests/gem_exec_params.c +++ b/tests/gem_exec_params.c @@ -48,6 +48,7 @@ #define LOCAL_I915_EXEC_BSD_MASK (3<<13) #define LOCAL_I915_EXEC_BSD_RING1 (1<<13) #define LOCAL_I915_EXEC_BSD_RING2 (2<<13) +#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<16) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; @@ -220,7 +221,7 @@ igt_main /* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle */ igt_subtest("invalid-flag") { - execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1); + execbuf.flags = I915_EXEC_RENDER | (LOCAL_I915_EXEC_RESOURCE_STREAMER << 1); RUN_FAIL(EINVAL); } @@ -234,6 +235,30 @@ igt_main execbuf.num_cliprects = 0; } + igt_subtest("rs-invalid-on-bsd-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_BSD | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-on-blt-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_BLT | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-on-vebox-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_VEBOX | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-gen") { + igt_require(!IS_HASWELL(devid) && intel_gen(devid) < 8); + execbuf.flags = I915_EXEC_RENDER | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + #define DIRT(name) \ igt_subtest(#name "-dirt") { \ execbuf.flags = 0; \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
On 05/18/2015 05:55 PM, Chris Wilson wrote: > On Mon, May 18, 2015 at 11:31:54AM +0300, Abdiel Janulgue wrote: >> Ensures that the batch buffer is executed by the resource streamer >> >> Signed-off-by: Abdiel Janulgue > > 1-3: > Reviewed-by: Chris Wilson > > Now all you have to do is satisfy Daniel with a few igt. > -Chris > Thanks for the review! :) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW
On 05/19/2015 11:26 AM, Daniel Vetter wrote: > On Tue, May 19, 2015 at 09:58:52AM +0300, Abdiel Janulgue wrote: >> >> >> On 05/18/2015 07:07 PM, Ville Syrjälä wrote: >>> On Mon, May 18, 2015 at 04:41:51PM +0100, Chris Wilson wrote: >>>> On Mon, May 18, 2015 at 06:36:18PM +0300, Ville Syrjälä wrote: >>>>> On Mon, May 18, 2015 at 11:31:56AM +0300, Abdiel Janulgue wrote: >>>>>> Also clarify comments on context size that the extra state for >>>>>> Resource Streamer is included. >>>>>> >>>>>> Suggested-by: Chris Wilson >>>>>> Signed-off-by: Abdiel Janulgue >>>>>> --- >>>>>> drivers/gpu/drm/i915/i915_gem_context.c | 2 +- >>>>>> drivers/gpu/drm/i915/i915_reg.h | 3 ++- >>>>>> 2 files changed, 3 insertions(+), 2 deletions(-) >>>>>> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >>>>>> b/drivers/gpu/drm/i915/i915_gem_context.c >>>>>> index f3e84c4..1db107a 100644 >>>>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c >>>>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >>>>>> @@ -509,7 +509,7 @@ mi_set_context(struct intel_engine_cs *ring, >>>>>> } >>>>>> >>>>>> /* These flags are for resource streamer on HSW+ */ >>>>>> -if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) >>>>>> +if (IS_HASWELL(ring->dev)) >>>>>> flags |= (MI_SAVE_EXT_STATE_EN | >>>>>> MI_RESTORE_EXT_STATE_EN); >>>>> >>>>> I don't get it. Previously we told the hardware to save the extended >>>>> context on !hsw, and now we don't. That doesn't seem correct to me. >>>> >>>> We don't use the extended state elsewhere. >>> >>> Umm. On SNB at least 3DSTATE_POLY_STIPPLE_PATTERN seems to be part of >>> the extended state, and on IVB/VLV SOL state is there. Mesa uses all of >>> that. >>> >>>> I'd always been dubious of >>>> the origins/intentions of this line of code since it claims only to be >>>> for enabling RS on HSW... >>>> >>>> i.e. commit e80f14b6d36e3e07111cf2ab084ef8dd5d015ce2 >>>> Author: Ben Widawsky >>>> Date: Mon Aug 18 10:35:28 2014 -0700 >>>> >>>> drm/i915: Don't save/restore RS when not used >>>> >>>> was backwards. >>> >>> ? It did exactly what it said, ie. avoid setting the RS save/restore >>> bits on HSW+ while leaving the ext save/restore enabled on older >>> platforms. >>> >> >> Another option is to enable extended state save restore for both HSW and >> the older platforms so we would get both features (RS save/restore and >> Extended State Save enable)? >> >> Seems the reason for this confusion is that the we have the exact same >> bit positions in MI_SET_CONTEXT but it got renamed starting from HSW and up. > > If the bit has been renamed it might be good to add at least a new #define > with a _HSw suffix and better name, just to make it clear what's going on. > gcc will see through this and fold down the different conditions to just > one. I'll do that in the next revision and also add the igt tag you require. In the meantime, please check the igt approach I sent is the correct one. Thanks! -Abdiel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] tests/gem_exec_params: check invalid flags for Resource Streamer
Make sure resource streamer flags works only in correct ring in addition to checking next flag after the RS boundary fails. Cc: Daniel Vetter Signed-off-by: Abdiel Janulgue --- tests/gem_exec_params.c | 21 - 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c index 54f0dc3..08ee330 100644 --- a/tests/gem_exec_params.c +++ b/tests/gem_exec_params.c @@ -48,6 +48,7 @@ #define LOCAL_I915_EXEC_BSD_MASK (3<<13) #define LOCAL_I915_EXEC_BSD_RING1 (1<<13) #define LOCAL_I915_EXEC_BSD_RING2 (2<<13) +#define LOCAL_I915_EXEC_RESOURCE_STREAMER (1<<16) struct drm_i915_gem_execbuffer2 execbuf; struct drm_i915_gem_exec_object2 gem_exec[1]; @@ -220,7 +221,7 @@ igt_main /* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle */ igt_subtest("invalid-flag") { - execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1); + execbuf.flags = I915_EXEC_RENDER | (LOCAL_I915_EXEC_RESOURCE_STREAMER << 1); RUN_FAIL(EINVAL); } @@ -234,6 +235,24 @@ igt_main execbuf.num_cliprects = 0; } + igt_subtest("rs-invalid-on-bsd-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_BSD | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-on-blt-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_BLT | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + + igt_subtest("rs-invalid-on-vebox-ring") { + igt_require(IS_HASWELL(devid) || intel_gen(devid) >= 8); + execbuf.flags = I915_EXEC_VEBOX | LOCAL_I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + #define DIRT(name) \ igt_subtest(#name "-dirt") { \ execbuf.flags = 0; \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW
On 05/18/2015 07:07 PM, Ville Syrjälä wrote: > On Mon, May 18, 2015 at 04:41:51PM +0100, Chris Wilson wrote: >> On Mon, May 18, 2015 at 06:36:18PM +0300, Ville Syrjälä wrote: >>> On Mon, May 18, 2015 at 11:31:56AM +0300, Abdiel Janulgue wrote: >>>> Also clarify comments on context size that the extra state for >>>> Resource Streamer is included. >>>> >>>> Suggested-by: Chris Wilson >>>> Signed-off-by: Abdiel Janulgue >>>> --- >>>> drivers/gpu/drm/i915/i915_gem_context.c | 2 +- >>>> drivers/gpu/drm/i915/i915_reg.h | 3 ++- >>>> 2 files changed, 3 insertions(+), 2 deletions(-) >>>> >>>> diff --git a/drivers/gpu/drm/i915/i915_gem_context.c >>>> b/drivers/gpu/drm/i915/i915_gem_context.c >>>> index f3e84c4..1db107a 100644 >>>> --- a/drivers/gpu/drm/i915/i915_gem_context.c >>>> +++ b/drivers/gpu/drm/i915/i915_gem_context.c >>>> @@ -509,7 +509,7 @@ mi_set_context(struct intel_engine_cs *ring, >>>>} >>>> >>>>/* These flags are for resource streamer on HSW+ */ >>>> - if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) >>>> + if (IS_HASWELL(ring->dev)) >>>>flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); >>> >>> I don't get it. Previously we told the hardware to save the extended >>> context on !hsw, and now we don't. That doesn't seem correct to me. >> >> We don't use the extended state elsewhere. > > Umm. On SNB at least 3DSTATE_POLY_STIPPLE_PATTERN seems to be part of > the extended state, and on IVB/VLV SOL state is there. Mesa uses all of > that. > >> I'd always been dubious of >> the origins/intentions of this line of code since it claims only to be >> for enabling RS on HSW... >> >> i.e. commit e80f14b6d36e3e07111cf2ab084ef8dd5d015ce2 >> Author: Ben Widawsky >> Date: Mon Aug 18 10:35:28 2014 -0700 >> >> drm/i915: Don't save/restore RS when not used >> >> was backwards. > > ? It did exactly what it said, ie. avoid setting the RS save/restore > bits on HSW+ while leaving the ext save/restore enabled on older > platforms. > Another option is to enable extended state save restore for both HSW and the older platforms so we would get both features (RS save/restore and Extended State Save enable)? Seems the reason for this confusion is that the we have the exact same bit positions in MI_SET_CONTEXT but it got renamed starting from HSW and up. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH i-g-t] tests/gem_exec_params: check invalid flags for Resource Streamer
Signed-off-by: Abdiel Janulgue --- tests/gem_exec_params.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c index 54f0dc3..fd9d7bd 100644 --- a/tests/gem_exec_params.c +++ b/tests/gem_exec_params.c @@ -220,7 +220,7 @@ igt_main /* HANDLE_LUT and NO_RELOC are already exercised by gem_exec_lut_handle */ igt_subtest("invalid-flag") { - execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_HANDLE_LUT << 1); + execbuf.flags = I915_EXEC_RENDER | (I915_EXEC_RESOURCE_STREAMER << 1); RUN_FAIL(EINVAL); } -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH v3 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
On 05/18/2015 12:01 PM, Daniel Vetter wrote: > On Mon, May 18, 2015 at 11:31:54AM +0300, Abdiel Janulgue wrote: >> Ensures that the batch buffer is executed by the resource streamer >> >> Signed-off-by: Abdiel Janulgue > > Maybe I missed them, but we also need a patch to update gem_exec_params > from igt. At least the invalid-flag subtest should fail with this > applied. Also please add a Testcase: tag once you've added the testcase > for this new flag. I'm not sure what you mean here. When I run unmodified gem_exec_params with this resource streamer patches applied. I get this results, at least on the invalid-flag section: Subtest invalid-flag: SUCCESS (0.000s) Is this supposed to fail? -abdiel > >> --- >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ >> drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + >> include/uapi/drm/i915_drm.h| 7 ++- >> 3 files changed, 22 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index a3190e79..8a0abbb 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void >> *data, >> return -EINVAL; >> } >> >> +if (args->flags & I915_EXEC_RESOURCE_STREAMER) { >> +if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { >> +DRM_DEBUG("RS is only allowed for Haswell, Gen8 " >> + "and above\n"); >> +return -EINVAL; >> +} >> +if (ring->id != RCS) { >> +DRM_DEBUG("RS is not available on %s\n", >> + ring->name); >> +return -EINVAL; >> +} >> + >> +dispatch_flags |= I915_DISPATCH_RS; >> +} >> + >> intel_runtime_pm_get(dev_priv); >> >> ret = i915_mutex_lock_interruptible(dev); >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h >> b/drivers/gpu/drm/i915/intel_ringbuffer.h >> index c761fe0..3521bc0 100644 >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h >> @@ -167,6 +167,7 @@ struct intel_engine_cs { >> unsigned dispatch_flags); >> #define I915_DISPATCH_SECURE 0x1 >> #define I915_DISPATCH_PINNED 0x2 >> +#define I915_DISPATCH_RS 0x4 >> void(*cleanup)(struct intel_engine_cs *ring); >> >> /* GEN8 signal/wait table - never trust comments! >> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h >> index 551b673..a4c1a5c 100644 >> --- a/include/uapi/drm/i915_drm.h >> +++ b/include/uapi/drm/i915_drm.h >> @@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 { >> #define I915_EXEC_BSD_RING1 (1<<13) >> #define I915_EXEC_BSD_RING2 (2<<13) >> >> -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) >> +/** Tell the kernel that the batchbuffer is processed by >> + * the resource streamer. >> + */ >> +#define I915_EXEC_RESOURCE_STREAMER (1<<16) >> + >> +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) >> >> #define I915_EXEC_CONTEXT_ID_MASK (0x) >> #define i915_execbuffer2_set_context_id(eb2, context) \ >> -- >> 1.9.1 >> >> ___ >> Intel-gfx mailing list >> Intel-gfx@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 2/3] drm/i915: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for executing the resource streamer on BDW and HSW v2: Add support for Execlists (Minu Mathai ) Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c| 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 -- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b522eb6..238bb25 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -356,6 +356,7 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_PREDICATE_SRC0 (0x2400) #define MI_PREDICATE_SRC1 (0x2408) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fcb074b..d523494 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1172,7 +1172,8 @@ static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); intel_logical_ring_emit(ringbuf, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..9045144 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? -0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 3/3] drm/i915: Enable Resource Streamer state save/restore in HSW
Also clarify comments on context size that the extra state for Resource Streamer is included. Suggested-by: Chris Wilson Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/i915_reg.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index f3e84c4..1db107a 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -509,7 +509,7 @@ mi_set_context(struct intel_engine_cs *ring, } /* These flags are for resource streamer on HSW+ */ - if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) + if (IS_HASWELL(ring->dev)) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 238bb25..3db0596 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2498,7 +2498,8 @@ enum skl_disp_power_wells { * valid. Now, docs explain in dwords what is in the context object. The full * size is 70720 bytes, however, the power context and execlist context will * never be saved (power context is stored elsewhere, and execlists don't work - * on HSW) - so the final size is 66944 bytes, which rounds to 17 pages. + * on HSW) - so the final size, including the extra state required for the + * Resource Streamer, is 66944 bytes, which rounds to 17 pages. */ #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) /* Same as Haswell, but 72064 bytes now. */ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v3 1/3] drm/i915: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e79..8a0abbb 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 " + "and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe0..3521bc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -167,6 +167,7 @@ struct intel_engine_cs { unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_engine_cs *ring); /* GEN8 signal/wait table - never trust comments! diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b673..a4c1a5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<16) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START
Hi, On 05/13/2015 01:22 PM, Chris Wilson wrote: > On Wed, May 13, 2015 at 11:13:24AM +0300, Abdiel Janulgue wrote: >> Adds support for executing the resource streamer on BDW and HSW >> >> v2: Add support for Execlists (Minu Mathai ) >> >> Signed-off-by: Abdiel Janulgue > > I would have liked to have seen the comments for HSW_CTX_TOTAL_SIZE > updated to include the resource streamer requirements. Comment block mention HSW_CTX_TOTAL_SIZE as 70720 bytes which already implicitly include RS dwords. I can add "70720 bytes include resource streamer dwords" to the comments or do you want me to break this down further still? > > Also > > /* These flags are for resource streamer on HSW+ */ > if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8) > flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); > > implies to me that we should be setting something for hsw to > save/restore RS that we do not currently. So either the comment needs > fixing, or we have a lack of code. I checked, the current code does disable context save and restore for RS in HSW (commit e80f14b6d36e3e07111cf2ab084ef8dd5d015ce2) But I've now modified this to: if (IS_HASWELL(ring->dev)) flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN); so that MI_SET_CONTEXT includes RS context on HSW. Anyway, this seems to work perfectly fine in HSW as well. I'll include that in the next version. > > Note that intel_lrc.c has duplicate functions for gen8. :| > -Chris > ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e79..f937ad0 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1485,6 +1485,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, return -EINVAL; } + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 " + "and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s)\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + intel_runtime_pm_get(dev_priv); ret = i915_mutex_lock_interruptible(dev); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe0..3521bc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -167,6 +167,7 @@ struct intel_engine_cs { unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_engine_cs *ring); /* GEN8 signal/wait table - never trust comments! diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b673..a4c1a5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<16) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for executing the resource streamer on BDW and HSW v2: Add support for Execlists (Minu Mathai ) Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_lrc.c| 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.c | 6 -- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b522eb6..238bb25 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -356,6 +356,7 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_PREDICATE_SRC0 (0x2400) #define MI_PREDICATE_SRC1 (0x2408) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index fcb074b..d523494 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -1172,7 +1172,8 @@ static int gen8_emit_bb_start(struct intel_ringbuffer *ringbuf, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_logical_ring_emit(ringbuf, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_logical_ring_emit(ringbuf, lower_32_bits(offset)); intel_logical_ring_emit(ringbuf, upper_32_bits(offset)); intel_logical_ring_emit(ringbuf, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..9045144 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? -0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer v2
Changes since initial posting: - Don't split the conversion from args->flags to ring from its subsequent EINVAL check (Chris Wilson ) - Execlists support (Minu Mathai ) -- Abdiel Janulgue (2): drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ drivers/gpu/drm/i915/i915_reg.h| 1 + drivers/gpu/drm/i915/intel_lrc.c | 3 ++- drivers/gpu/drm/i915/intel_ringbuffer.c| 6 -- drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 6 files changed, 29 insertions(+), 4 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag
On 05/12/2015 12:49 PM, Chris Wilson wrote: > On Mon, May 11, 2015 at 12:01:11PM +0300, Abdiel Janulgue wrote: >> Ensures that the batch buffer is executed by the resource streamer >> >> Signed-off-by: Abdiel Janulgue >> --- >> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ >> drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + >> include/uapi/drm/i915_drm.h| 7 ++- >> 3 files changed, 22 insertions(+), 1 deletion(-) >> >> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> index a3190e79..afbd3c16 100644 >> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c >> @@ -1474,6 +1474,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void >> *data, >> } else >> ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; >> >> +if (args->flags & I915_EXEC_RESOURCE_STREAMER) { >> +if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { >> +DRM_DEBUG("RS is only allowed for Haswell, Gen8 " >> + "and above\n"); >> +return -EINVAL; >> +} >> +if (ring->id != RCS) { >> +DRM_DEBUG("RS is not available on %s)\n", >> + ring->name); >> +return -EINVAL; >> +} >> + >> +dispatch_flags |= I915_DISPATCH_RS; >> +} >> + >> if (!intel_ring_initialized(ring)) { > > Please don't split the conversion from args->flags to ring from its > subsequent EINVAL check. > -Chris > I'll include this change in the next version. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer
On 05/11/2015 12:26 PM, Chris Wilson wrote: > On Mon, May 11, 2015 at 12:01:10PM +0300, Abdiel Janulgue wrote: >> This is a re-spin of my resource streamer patchset from a year ago. >> The resource streamer is a hw-feature that helps in reducing commands >> submitted by the CPU. > > Did you check that the contexts we allocate are large enough to hold the > extra RS state? I can't remember if the size we settled on was > sufficient or not... > -Chris > I checked HSW_CXT_TOTAL_SIZE and GEN8_CXT_TOTAL_SIZE to the values in Bspec, the sizes does indeed contain the resource streamer dwords. So I guess the answer is it's sufficient. -Abdiel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 15 +++ drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index a3190e79..afbd3c16 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1474,6 +1474,21 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } else ring = &dev_priv->ring[(args->flags & I915_EXEC_RING_MASK) - 1]; + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if (!IS_HASWELL(dev) && INTEL_INFO(dev)->gen < 8) { + DRM_DEBUG("RS is only allowed for Haswell, Gen8 " + "and above\n"); + return -EINVAL; + } + if (ring->id != RCS) { + DRM_DEBUG("RS is not available on %s)\n", +ring->name); + return -EINVAL; + } + + dispatch_flags |= I915_DISPATCH_RS; + } + if (!intel_ring_initialized(ring)) { DRM_DEBUG("execbuf with invalid ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index c761fe0..3521bc0 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -167,6 +167,7 @@ struct intel_engine_cs { unsigned dispatch_flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_engine_cs *ring); /* GEN8 signal/wait table - never trust comments! diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 551b673..a4c1a5c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -760,7 +760,12 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD_RING1(1<<13) #define I915_EXEC_BSD_RING2(2<<13) -#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<16) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] drm/i915/hsw/bdw: Enable resource streamer
This is a re-spin of my resource streamer patchset from a year ago. The resource streamer is a hw-feature that helps in reducing commands submitted by the CPU. We have finally have the Mesa optimization that requires the use of this interface. Abdiel Janulgue (2): drm/i915/hsw/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START drivers/gpu/drm/i915/i915_gem_execbuffer.c | 14 ++ drivers/gpu/drm/i915/i915_reg.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 6 -- drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 5 files changed, 26 insertions(+), 3 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/hsw/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for executing the resource streamer on BDW Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index b522eb6..238bb25 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -356,6 +356,7 @@ #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_PREDICATE_SRC0 (0x2400) #define MI_PREDICATE_SRC1 (0x2408) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 441e250..9045144 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -2385,7 +2385,8 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); @@ -2408,7 +2409,8 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | (dispatch_flags & I915_DISPATCH_SECURE ? -0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | + (dispatch_flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] drm/i915/bdw: Enable resource streamer on Broadwell
On 11.06.2014 11:10, Jesse Barnes wrote: On Tue, 6 May 2014 22:25:04 +0300 Abdiel Janulgue wrote: From: Abdiel Janulgue This is a re-spin of my resource streamer patchset from October adapted to enable the feature on Broadwell instead. The resource streamer is a hw-feature that helps in reducing commands being submitted by the CPU. Haswell initially has this feature. Unfortunately, HSW seems to have problems switching between non-RS and RS-enabled commands[1]. On BDW however it works seamlessly as expected. The i-g-t test comes next on a separate patch-series. -- [1] http://lists.freedesktop.org/archives/intel-gfx/2014-May/044577.html Abdiel Janulgue (2): drm/i915/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START drm/i915/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c|3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h|1 + include/uapi/drm/i915_drm.h|7 ++- 5 files changed, 18 insertions(+), 2 deletions(-) These seem trivial enough... have you seen cases where you'd like to enable it in Mesa? If so, it probably makes sense to merge this patch so you can do your tuning and enabling on the Mesa side... I've had some Mesa patches since last year. Unfortunately, the changes didn't give any dramatic performance improvements. On the other hand, I admit: 1. I've only run it against GLBenchmark and Unigine benchmarks. Discussion with Portland folks seem to suggest that we need to have a comprehensive run with more benchmarks. 2. I only did the benchmarks in Haswell. Would be nice to see how it does on Broadwell. I agree though that further tuning of the performance needs to be done on the Mesa side. ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
On Wednesday, May 07, 2014 02:49:31 PM Ville Syrjälä wrote: > I quickly cobbled together a hsw version of this and gave it a whirl on > one machine. Seems to work just fine here, and no lockups when switching > between hw and sw binding tables. Did you get the lockups on hsw even > with rendercopy? > > Here's my hsw version: > > > +static void > +gen7_hw_binding_table(struct intel_batchbuffer *batch, bool enable) > +{ > + if (!enable) { > + OUT_BATCH(MI_RS_CONTROL | 0x0); > + > + OUT_BATCH(HSW_3DSTATE_BINDING_TABLE_POOL_ALLOC | (3 - 2)); > + /* binding table pool base address */ This bit I missed and the source of my troubles for the past few months. > + OUT_BATCH(3 << 5); Yep, I confirm toggling on HSW does work quite well now. I'll now update the patches to include HSW path on the kernel. I also take back my previous statement that RS is broken on HSW! :) Thanks a lot Ville! > + /* Upper bound */ > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); > + OUT_BATCH(GEN7_PIPE_CONTROL_CS_STALL | > GEN7_PIPE_CONTROL_STALL_AT_SCOREBOARD); + OUT_BATCH(0); > + OUT_BATCH(0); > + > + OUT_BATCH(GEN7_PIPE_CONTROL | (4 - 2)); > + OUT_BATCH(GEN7_PIPE_CONTROL_SC_INVALIDATE); > + OUT_BATCH(0); > + OUT_BATCH(0); > + > + return; > +} ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
On Wednesday, May 07, 2014 12:38:04 AM Ville Syrjälä wrote: > On Tue, May 06, 2014 at 10:48:02PM +0300, Abdiel Janulgue wrote: > > Use on-chip hw-binding table generator to generate binding > > tables when the test emits SURFACE_STATES packets. The hw generates > > these binding table packets internally so we don't have to > > allocate space on the batchbuffer. > > > > Signed-off-by: Abdiel Janulgue > > --- > > > > lib/gen8_render.h | 13 +++ > > lib/intel_batchbuffer.c | 12 ++ > > lib/intel_batchbuffer.h |3 ++ > > lib/intel_reg.h |3 ++ > > lib/rendercopy_gen8.c | 97 > > --- 5 files changed, 123 > > insertions(+), 5 deletions(-) > > > > diff --git a/lib/gen8_render.h b/lib/gen8_render.h > > index fffc100..4d5f7ba 100644 > > --- a/lib/gen8_render.h > > +++ b/lib/gen8_render.h > > @@ -83,6 +83,19 @@ > > > > /* Random shifts */ > > #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23 > > > > +/* GEN8+ resource streamer */ > > +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x7919 << 16) /* > > GEN8 */ +# define BINDING_TABLE_POOL_ENABLE 0x0800 > > +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS (0x7843 << 16) /* > > GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS (0x7844 << > > 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS > > (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS > > (0x7846 << 16) /* GEN8 */ +#define > > GEN8_3DSTATE_BINDING_TABLE_EDIT_PS (0x7847 << 16) /* GEN8 */ + > > +/* Toggling RS needs PIPE_CONTROL SC invalidate */ > > +#define _3DSTATE_PIPE_CONTROL ((0x3 << 29) | (3 << 27) | (2 > > << 24)) > > +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1 << 2) > > + > > > > /* Shamelessly ripped from mesa */ > > struct gen8_surface_state > > { > > > > diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c > > index e868922..26c9b89 100644 > > --- a/lib/intel_batchbuffer.c > > +++ b/lib/intel_batchbuffer.c > > @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer > > *batch) > > > > memset(batch->buffer, 0, sizeof(batch->buffer)); > > > > batch->ptr = batch->buffer; > > > > + > > + if (batch->hw_bt_pool_bo != NULL) { > > + drm_intel_bo_unreference(batch->hw_bt_pool_bo); > > + batch->hw_bt_pool_bo = NULL; > > + } > > + > > + batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt", > > + 131072, 4096); > > > > } > > > > /** > > > > @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer > > *batch)> > > { > > > > drm_intel_bo_unreference(batch->bo); > > batch->bo = NULL; > > > > + > > + drm_intel_bo_unreference(batch->hw_bt_pool_bo); > > + batch->hw_bt_pool_bo = NULL; > > + > > > > free(batch); > > > > } > > > > diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h > > index 49dbcf0..2a516bc 100644 > > --- a/lib/intel_batchbuffer.h > > +++ b/lib/intel_batchbuffer.h > > @@ -18,6 +18,9 @@ struct intel_batchbuffer { > > > > uint8_t buffer[BATCH_SZ]; > > uint8_t *ptr; > > uint8_t *state; > > > > + > > + drm_intel_bo *hw_bt_pool_bo; > > + int use_resource_streamer; > > > > }; > > > > struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr > > *bufmgr, > > > > diff --git a/lib/intel_reg.h b/lib/intel_reg.h > > index 4b3a102..1cb9a29 100644 > > --- a/lib/intel_reg.h > > +++ b/lib/intel_reg.h > > @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE > > SOFTWARE.> > > #define MI_BATCH_NON_SECURE(1) > > #define MI_BATCH_NON_SECURE_I965 (1 << 8) > > > > +/* Resource Streamer */ > > +#define MI_RS_CONTROL (0x6 << 23) > > + > > > > #define MAX_DISPLAY_PIPES 2 > > > > typedef enum { > > > > diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c > > index 6f5a698..b0dd0f3 100644 > > --- a/lib/rendercopy_gen8.c > > +++ b/lib/rendercopy_gen8.c > > @@ -169,12 +169,14 @@ static void &
[Intel-gfx] [PATCH] igt tests/gem_exec_params: Test for -EINVAL for invalid platform and ring when executing RS
Add test that makes sure RS bit only gets executed on BDW and on the render ring. Signed-off-by: Abdiel Janulgue --- tests/gem_exec_params.c | 11 +++ 1 file changed, 11 insertions(+) diff --git a/tests/gem_exec_params.c b/tests/gem_exec_params.c index 769969d..a3f765b 100644 --- a/tests/gem_exec_params.c +++ b/tests/gem_exec_params.c @@ -193,6 +193,17 @@ igt_main execbuf.num_cliprects = 0; } + igt_subtest("rs-not-gen8") { +igt_require(intel_gen(devid) != 8); +execbuf.flags = I915_EXEC_RENDER | I915_EXEC_RESOURCE_STREAMER; +RUN_FAIL(EINVAL); +} + + igt_subtest("invalid-rs-ring") { + execbuf.flags = I915_EXEC_RESOURCE_STREAMER; + RUN_FAIL(EINVAL); + } + #define DIRT(name) \ igt_subtest(#name "-dirt") { \ execbuf.flags = 0; \ -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] rendercopy/bdw: Enable hw-generated binding tables
Use on-chip hw-binding table generator to generate binding tables when the test emits SURFACE_STATES packets. The hw generates these binding table packets internally so we don't have to allocate space on the batchbuffer. Signed-off-by: Abdiel Janulgue --- lib/gen8_render.h | 13 +++ lib/intel_batchbuffer.c | 12 ++ lib/intel_batchbuffer.h |3 ++ lib/intel_reg.h |3 ++ lib/rendercopy_gen8.c | 97 --- 5 files changed, 123 insertions(+), 5 deletions(-) diff --git a/lib/gen8_render.h b/lib/gen8_render.h index fffc100..4d5f7ba 100644 --- a/lib/gen8_render.h +++ b/lib/gen8_render.h @@ -83,6 +83,19 @@ /* Random shifts */ #define GEN8_3DSTATE_PS_MAX_THREADS_SHIFT 23 +/* GEN8+ resource streamer */ +#define GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC (0x7919 << 16) /* GEN8 */ +# define BINDING_TABLE_POOL_ENABLE 0x0800 +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_VS (0x7843 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_GS (0x7844 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_HS (0x7845 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_DS (0x7846 << 16) /* GEN8 */ +#define GEN8_3DSTATE_BINDING_TABLE_EDIT_PS (0x7847 << 16) /* GEN8 */ + +/* Toggling RS needs PIPE_CONTROL SC invalidate */ +#define _3DSTATE_PIPE_CONTROL ((0x3 << 29) | (3 << 27) | (2 << 24)) +#define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1 << 2) + /* Shamelessly ripped from mesa */ struct gen8_surface_state { diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c index e868922..26c9b89 100644 --- a/lib/intel_batchbuffer.c +++ b/lib/intel_batchbuffer.c @@ -81,6 +81,14 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch) memset(batch->buffer, 0, sizeof(batch->buffer)); batch->ptr = batch->buffer; + + if (batch->hw_bt_pool_bo != NULL) { + drm_intel_bo_unreference(batch->hw_bt_pool_bo); + batch->hw_bt_pool_bo = NULL; + } + + batch->hw_bt_pool_bo = drm_intel_bo_alloc(batch->bufmgr, "hw_bt", + 131072, 4096); } /** @@ -116,6 +124,10 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch) { drm_intel_bo_unreference(batch->bo); batch->bo = NULL; + + drm_intel_bo_unreference(batch->hw_bt_pool_bo); + batch->hw_bt_pool_bo = NULL; + free(batch); } diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h index 49dbcf0..2a516bc 100644 --- a/lib/intel_batchbuffer.h +++ b/lib/intel_batchbuffer.h @@ -18,6 +18,9 @@ struct intel_batchbuffer { uint8_t buffer[BATCH_SZ]; uint8_t *ptr; uint8_t *state; + + drm_intel_bo *hw_bt_pool_bo; + int use_resource_streamer; }; struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, diff --git a/lib/intel_reg.h b/lib/intel_reg.h index 4b3a102..1cb9a29 100644 --- a/lib/intel_reg.h +++ b/lib/intel_reg.h @@ -2560,6 +2560,9 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #define MI_BATCH_NON_SECURE(1) #define MI_BATCH_NON_SECURE_I965 (1 << 8) +/* Resource Streamer */ +#define MI_RS_CONTROL (0x6 << 23) + #define MAX_DISPLAY_PIPES 2 typedef enum { diff --git a/lib/rendercopy_gen8.c b/lib/rendercopy_gen8.c index 6f5a698..b0dd0f3 100644 --- a/lib/rendercopy_gen8.c +++ b/lib/rendercopy_gen8.c @@ -169,12 +169,14 @@ static void gen6_render_flush(struct intel_batchbuffer *batch, drm_intel_context *context, uint32_t batch_end) { - int ret; + int ret = 0; + uint32_t flags = 0; ret = drm_intel_bo_subdata(batch->bo, 0, 4096, batch->buffer); + flags = batch->use_resource_streamer ? I915_EXEC_RESOURCE_STREAMER : 0; if (ret == 0) ret = drm_intel_gem_bo_context_exec(batch->bo, context, - batch_end, 0); + batch_end, flags | I915_EXEC_RENDER); assert(ret == 0); } @@ -228,18 +230,83 @@ gen8_bind_buf(struct intel_batchbuffer *batch, struct igt_buf *buf, return offset; } +static void +gen8_hw_binding_table(struct intel_batchbuffer *batch, bool enable) +{ + if (!enable) { + OUT_BATCH(MI_RS_CONTROL | 0x0); + + OUT_BATCH(GEN8_3DSTATE_BINDING_TABLE_POOL_ALLOC | (4 - 2)); + /* binding table pool base address */ + OUT_BATCH(0); + OUT_BATCH(0); + /* Upper bound */ + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2)); + OUT_BATCH(PIPE_CONTROL_STATE_CACHE_INVALIDATE); + OUT_BATCH(0); +
[Intel-gfx] [PATCH 2/2] tests/gem_render_copy: Add option to test resource streamer
Add option in basic test for the render_copy to test and toggle hw-generated binding tables feature. Signed-off-by: Abdiel Janulgue --- tests/gem_render_copy.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/gem_render_copy.c b/tests/gem_render_copy.c index 331b7ce..c9dfba0 100644 --- a/tests/gem_render_copy.c +++ b/tests/gem_render_copy.c @@ -131,14 +131,19 @@ int main(int argc, char **argv) int opt; int opt_dump_png = false; int opt_dump_aub = igt_aub_dump_enabled(); + int opt_use_rs = false; igt_simple_init(); - while ((opt = getopt(argc, argv, "d")) != -1) { + while ((opt = getopt(argc, argv, "d:r")) != -1) { switch (opt) { case 'd': opt_dump_png = true; break; + case 'r': + printf("Use resource streamer\n"); + opt_use_rs = true; + break; default: break; } @@ -156,6 +161,7 @@ int main(int argc, char **argv) "no render-copy function\n"); batch = intel_batchbuffer_alloc(data.bufmgr, data.devid); + batch->use_resource_streamer = opt_use_rs; igt_assert(batch); } -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] igt tests/rendercopy: Add option to test resource streamer functionality
This patchseries tests a resource streamer feature called hw-generated binding tables, which can be enabled from userspace. It is essentially a newer format that uses the hw to generate and submit binding tables to the GPU. It uses the basic rendercopy test to toggle the feature on and off and to verify if the resulting data is still correct. Abdiel Janulgue (2): rendercopy/bdw: Enable hw-generated binding tables tests/gem_render_copy: Add option to test resource streamer lib/gen8_render.h | 13 +++ lib/intel_batchbuffer.c | 12 ++ lib/intel_batchbuffer.h |3 ++ lib/intel_reg.h |3 ++ lib/rendercopy_gen8.c | 97 --- tests/gem_render_copy.c |8 +++- 6 files changed, 130 insertions(+), 6 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] drm/i915/bdw: Enable resource streamer on Broadwell
From: Abdiel Janulgue This is a re-spin of my resource streamer patchset from October adapted to enable the feature on Broadwell instead. The resource streamer is a hw-feature that helps in reducing commands being submitted by the CPU. Haswell initially has this feature. Unfortunately, HSW seems to have problems switching between non-RS and RS-enabled commands[1]. On BDW however it works seamlessly as expected. The i-g-t test comes next on a separate patch-series. -- [1] http://lists.freedesktop.org/archives/intel-gfx/2014-May/044577.html Abdiel Janulgue (2): drm/i915/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START drm/i915/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 drivers/gpu/drm/i915/i915_reg.h|1 + drivers/gpu/drm/i915/intel_ringbuffer.c|3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h|1 + include/uapi/drm/i915_drm.h|7 ++- 5 files changed, 18 insertions(+), 2 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/bdw: Expose I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |8 include/uapi/drm/i915_drm.h|7 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 47fe8ec..68aaa9b 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -1074,6 +1074,14 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, if (args->flags & I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if ((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_RENDER || + !IS_BROADWELL(dev)) + return -EINVAL; + + flags |= I915_DISPATCH_RS; + } + if ((args->flags & I915_EXEC_RING_MASK) > LAST_USER_RING) { DRM_DEBUG("execbuf with unknown ring: %d\n", (int)(args->flags & I915_EXEC_RING_MASK)); diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 8a3e4ef00..adfc45d 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -734,7 +734,12 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<13) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/bdw: Enable resource streamer bits on MI_BATCH_BUFFER_START
Adds support for executing the resource streamer on BDW Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c |3 ++- drivers/gpu/drm/i915/intel_ringbuffer.h |1 + 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 0eff337..2d5d0ab 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -286,6 +286,7 @@ #define MI_BATCH_NON_SECURE_HSW (1<<13) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_BATCH_BUFFER_START_GEN8 MI_INSTR(0x31, 1) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 40a7aa4..cc0110c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1810,7 +1810,8 @@ gen8_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, return ret; /* FIXME(BDW): Address space and security selectors. */ - intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); + intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | + (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); intel_ring_emit(ring, lower_32_bits(offset)); intel_ring_emit(ring, upper_32_bits(offset)); intel_ring_emit(ring, MI_NOOP); diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 72c3c15..cf9e259 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -116,6 +116,7 @@ struct intel_ring_buffer { unsigned flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_ring_buffer *ring); struct { -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables
Hi all! On Thursday, April 24, 2014 01:17:14 PM Ville Syrjälä wrote: > On Thu, Apr 24, 2014 at 11:25:15AM +0300, Abdiel Janulgue wrote: > > On Thursday, April 24, 2014 07:06:34 AM Chris Wilson wrote: > > > On Thu, Apr 24, 2014 at 09:08:14AM +0300, Abdiel Janulgue wrote: > > > > Anyway I haven't tried the work-around where we explictly only disable > > > > the > > > > BT and RS on the other user-space clients (xorg driver in this case) > > > > when > > > > Mesa is using RS instead of forcing the reset of the clients to use RS > > > > format. I'll try that first and let you know if it works. > > > > I hate to break the bad news. Tried this just now - still get hangs :( > > > > So I guess, all userspace clients* does need to use RS-format if we use > > this feature. GPGPU workloads seems to be special use-case where the RS > > hwbinding table format can be disabled. Otherwise, I guess we are stuck > > with this inflexibility. > > The spec also says this: > "When switching between HW and SW binding table generation, > SW must issue a state cache invalidate." > > So it does look like they were expecting people to switch between the > two modes. > > Do you have any igt test for RS? Maybe add an option into rendercopy to > make use of RS? Then we could write some tests that try to hit this > problem w/o requiring Mesa. I've modified igt rendercopy to test switching RS format on and off. Good news and bad news! Good news is that switching between RS binding tables and SW format works seamlessly in BDW! Bad news for HSW because the problems I describe above only happens on that chip. This is a HW issue then. =Abdiel > > > [1] > > On the other hand, it doesn't seem all that bad though. The RS hw-binding > > table format are only needed for clients that submit vertex and pixel > > shader commands. I've identified currently just UXA and SNA that seem to > > use this besides Mesa. OpenCL is not affected. > > > > > > If it does, it > > > > might be more efficient to do that in the kernel? > > > > > > It has to be done in the kernel in order for interoperability with third > > > party clients. > > > -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables
On Thursday, April 24, 2014 01:17:14 PM Ville Syrjälä wrote: > On Thu, Apr 24, 2014 at 11:25:15AM +0300, Abdiel Janulgue wrote: > > On Thursday, April 24, 2014 07:06:34 AM Chris Wilson wrote: > > > On Thu, Apr 24, 2014 at 09:08:14AM +0300, Abdiel Janulgue wrote: > > > > Anyway I haven't tried the work-around where we explictly only disable > > > > the > > > > BT and RS on the other user-space clients (xorg driver in this case) > > > > when > > > > Mesa is using RS instead of forcing the reset of the clients to use RS > > > > format. I'll try that first and let you know if it works. > > > > I hate to break the bad news. Tried this just now - still get hangs :( > > > > So I guess, all userspace clients* does need to use RS-format if we use > > this feature. GPGPU workloads seems to be special use-case where the RS > > hwbinding table format can be disabled. Otherwise, I guess we are stuck > > with this inflexibility. > > The spec also says this: > "When switching between HW and SW binding table generation, > SW must issue a state cache invalidate." > > So it does look like they were expecting people to switch between the > two modes. Yep, the previous work-around did include a state cache invalidate. > > Do you have any igt test for RS? Maybe add an option into rendercopy to > make use of RS? Then we could write some tests that try to hit this > problem w/o requiring Mesa. Seems to be a good idea. I'll try to reproduce the problem with igt > > > [1] > > On the other hand, it doesn't seem all that bad though. The RS hw-binding > > table format are only needed for clients that submit vertex and pixel > > shader commands. I've identified currently just UXA and SNA that seem to > > use this besides Mesa. OpenCL is not affected. > > > > > > If it does, it > > > > might be more efficient to do that in the kernel? > > > > > > It has to be done in the kernel in order for interoperability with third > > > party clients. > > > -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables
On Thursday, April 24, 2014 07:06:34 AM Chris Wilson wrote: > On Thu, Apr 24, 2014 at 09:08:14AM +0300, Abdiel Janulgue wrote: > > Anyway I haven't tried the work-around where we explictly only disable the > > BT and RS on the other user-space clients (xorg driver in this case) when > > Mesa is using RS instead of forcing the reset of the clients to use RS > > format. I'll try that first and let you know if it works. I hate to break the bad news. Tried this just now - still get hangs :( So I guess, all userspace clients* does need to use RS-format if we use this feature. GPGPU workloads seems to be special use-case where the RS hwbinding table format can be disabled. Otherwise, I guess we are stuck with this inflexibility. [1] On the other hand, it doesn't seem all that bad though. The RS hw-binding table format are only needed for clients that submit vertex and pixel shader commands. I've identified currently just UXA and SNA that seem to use this besides Mesa. OpenCL is not affected. > > If it does, it > > might be more efficient to do that in the kernel? > > It has to be done in the kernel in order for interoperability with third > party clients. > -Chris ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables
On Wednesday, April 23, 2014 08:50:21 PM Ville Syrjälä wrote: > On Wed, Apr 23, 2014 at 06:52:09PM +0200, Daniel Vetter wrote: > > On Wed, Apr 23, 2014 at 1:21 PM, Abdiel Janulgue > > > > wrote: > > > I've already tried disabling RS at the end of every batch so that next > > > batch in different context can continue to use older non-RS format. > > > That does not work either and still causes hangs. > > > > > > What I've seen so far, it seems GPU does not like switching the format > > > of > > > commands from RS-format to non-RS format. It's either one way or the > > > other. If switched on, it affects subsequent contexes henceforth > > > expecting RS-format commands until the GPU gets reset. That's probably > > > the note in bspec: > > > > > > "the binding table generator feature has a simple all or nothing model". > > > > Oh hooray, that's just awesome :( So we indeed need to stop the gpu > > and reset it if there's a non-RS render batch after any RS render > > batch. > > I'm not sure I buy it. There's an example in the spec showing how to > disable the RS around eg. GPGPU workloads and re-enable it for 3D > workloads even within one batch. I guess it's possible that the GPGPU > pipeline mode is somehow special here, but since the RS state is > supposed to be saved to the context I'm thinking you should be able to > disable it whenever you want. > > What's really confusing with that example is the fact that it first > re-enables the binding tables and then the RS, but then there's a note > after that saying you have to those steps in the opposite order. > > Anyhoo, I'm wondering if the problems are simply due to disabling RS but > leaving the binding tables enabled? Yes, the work-around I tried previously is that at the end of each batch both hw-binding tables and RS are disabled in that particular order. > > So one idea might be that when we switch from executing an RS batch to a > non-RS batch, we should disable the RS and binding tables manually. We > would then have to demand that any user batch which needs the binding > tables enabled must enable them, even if if we just executed a batch that > already used the binding tables. And when we need to disable the RS and > binding tables we could either have the kernel do that autmagically when > it notices a RS->non-RS transition, or we could also demand that all user > batches must disable the binding tables at the end. But maybe demanding > that from every batch would incur some extra overhead? In any case it > should be fairly easy to try that and see if it cures the hangs. Or are > you already doing things that way? This is actually what current RS implementation in Mesa does. It explicitly enables RS then hw-binding tables for each batch start and then the tear-down work-around at the batch end I mentioned above; which didn't help at all. Anyway I haven't tried the work-around where we explictly only disable the BT and RS on the other user-space clients (xorg driver in this case) when Mesa is using RS instead of forcing the reset of the clients to use RS format. I'll try that first and let you know if it works. If it does, it might be more efficient to do that in the kernel? --Abdiel ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables
On Tuesday, April 22, 2014 07:20:58 PM Daniel Vetter wrote: > On Tue, Apr 22, 2014 at 04:23:12PM +0100, Chris Wilson wrote: > > On Tue, Apr 22, 2014 at 06:16:34PM +0300, Abdiel Janulgue wrote: > > > This patch series enables resource streamer for xf86-video-intel UXA. > > > > > > Fixes i965 Mesa driver that makes use of resource streamer optimization > > > to survive a full piglit run without bricking the machine. Previously, > > > I get random hungs when doing a full piglit round when enabling RS. > > > After months of head-scratching, I noticed I didn't quite pay attention > > > > > > to this small detail in bspec: > > > "The binding table generator feature has a simple all or nothing > > > > > > model. If HW generated binding tables are enabled, the driver must > > > enable > > > the pool and use 3D_HW_BINDING_TABLE_POINTER_* commands." > > > > > > I realized that our xf86-video-intel driver is piping out 3D commands > > > as well that used the older manual-generated binding table format > > > that caused a conflict when Mesa is using the hw-generated binding table > > > format. > > > > This has to be per-context right? Otherwise how do you intend to > > coordinate multiple clients submitting to the kernel using incompatible > > command sets? Even in the restricted X/DRI sense, how do you intend to > > negotiate which to use? > > Hm, I've thought that the MI_BB_START should synchronize with the > asynchronous RS parsing/processing? Is there no way to disable RS again > for the next batch in a different context? I've already tried disabling RS at the end of every batch so that next batch in different context can continue to use older non-RS format. That does not work either and still causes hangs. What I've seen so far, it seems GPU does not like switching the format of commands from RS-format to non-RS format. It's either one way or the other. If switched on, it affects subsequent contexes henceforth expecting RS-format commands until the GPU gets reset. That's probably the note in bspec: "the binding table generator feature has a simple all or nothing model". > > If there's no way to solve this with contexts or some manual reset trick > using LRIs then we're pretty decently screwed up. Worst case we need to > stop the gpu and reset it to keep backwards compat :( ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC] xf86-video-intel: enable hw-generated binding tables
This patch series enables resource streamer for xf86-video-intel UXA. Fixes i965 Mesa driver that makes use of resource streamer optimization to survive a full piglit run without bricking the machine. Previously, I get random hungs when doing a full piglit round when enabling RS. After months of head-scratching, I noticed I didn't quite pay attention to this small detail in bspec: "The binding table generator feature has a simple all or nothing model. If HW generated binding tables are enabled, the driver must enable the pool and use 3D_HW_BINDING_TABLE_POINTER_* commands." I realized that our xf86-video-intel driver is piping out 3D commands as well that used the older manual-generated binding table format that caused a conflict when Mesa is using the hw-generated binding table format. I didn't touch the SNA and video acceleration paths yet. Please let me know if I'm on the right track. To use the feature, set in xorg.conf: Option "ResourceStreamer" "true" src/intel_options.c |1 + src/intel_options.h |1 + src/uxa/i965_3d.c |5 ++- src/uxa/i965_reg.h |8 + src/uxa/i965_render.c | 78 +++ src/uxa/intel.h |3 ++ src/uxa/intel_batchbuffer.c |7 ++-- src/uxa/intel_driver.c | 10 +- 8 files changed, 94 insertions(+), 19 deletions(-) Abdiel Janulgue (2): [PATCH 1/2] xf86-video-intel: Add "ResourceStreamer" option [PATCH 2/2] xf86-video-intel: Enable hw-generated binding tables for ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC PATCH 1/2] xf86-video-intel: Add "ResourceStreamer" option
Signed-off-by: Abdiel Janulgue --- src/intel_options.c|1 + src/intel_options.h|1 + src/uxa/intel.h|3 +++ src/uxa/intel_driver.c | 10 +- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/intel_options.c b/src/intel_options.c index 02a4ae1..cc418d1 100644 --- a/src/intel_options.c +++ b/src/intel_options.c @@ -35,6 +35,7 @@ const OptionInfoRec intel_options[] = { {OPTION_DEBUG_FLUSH_CACHES, "DebugFlushCaches", OPTV_BOOLEAN, {0}, 0}, {OPTION_DEBUG_WAIT, "DebugWait", OPTV_BOOLEAN, {0}, 0}, {OPTION_BUFFER_CACHE, "BufferCache", OPTV_BOOLEAN, {0},1}, + {OPTION_RESOURCESTREAMER, "ResourceStreamer", OPTV_BOOLEAN, {0}, 0}, #endif {-1,NULL, OPTV_NONE, {0},0} }; diff --git a/src/intel_options.h b/src/intel_options.h index 77f0c45..4867419 100644 --- a/src/intel_options.h +++ b/src/intel_options.h @@ -43,6 +43,7 @@ enum intel_options { OPTION_DEBUG_FLUSH_CACHES, OPTION_DEBUG_WAIT, OPTION_BUFFER_CACHE, + OPTION_RESOURCESTREAMER, #endif NUM_OPTIONS, }; diff --git a/src/uxa/intel.h b/src/uxa/intel.h index 6ac770e..611eeea 100644 --- a/src/uxa/intel.h +++ b/src/uxa/intel.h @@ -349,6 +349,9 @@ typedef struct intel_screen_private { */ Bool fallback_debug; unsigned debug_flush; + + dri_bo *hw_bt_pool_bo; + Bool use_resource_streamer; #if HAVE_UDEV struct udev_monitor *uevent_monitor; pointer uevent_handler; diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c index 0a4fe2a..5574ffe 100644 --- a/src/uxa/intel_driver.c +++ b/src/uxa/intel_driver.c @@ -196,6 +196,15 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) intel->fallback_debug = xf86ReturnOptValBool(intel->Options, OPTION_FALLBACKDEBUG, FALSE); + intel->use_resource_streamer = xf86ReturnOptValBool(intel->Options, +OPTION_RESOURCESTREAMER, +FALSE); + if (intel->use_resource_streamer) + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, + "Enabling Resource Streamer\n"); + else + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, + "Disabling Resource Streamer\n"); intel->debug_flush = 0; @@ -870,7 +879,6 @@ I830ScreenInit(SCREEN_INIT_ARGS_DECL) * appropriate. */ intel->XvEnabled = TRUE; - #ifdef DRI2 if (intel->directRenderingType == DRI_NONE && I830DRI2ScreenInit(screen)) -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [RFC PATCH 2/2] xf86-video-intel: Enable hw-generated binding tables for UXA
Code is based on my hw-generated binding table code for Mesa adapted to i965_composite path in UXA. Signed-off-by: Abdiel Janulgue --- src/uxa/i965_3d.c |5 ++- src/uxa/i965_reg.h |8 + src/uxa/i965_render.c | 78 +++ src/uxa/intel_batchbuffer.c |7 ++-- 4 files changed, 80 insertions(+), 18 deletions(-) diff --git a/src/uxa/i965_3d.c b/src/uxa/i965_3d.c index 757a979..afbb5a7 100644 --- a/src/uxa/i965_3d.c +++ b/src/uxa/i965_3d.c @@ -406,7 +406,10 @@ gen7_upload_binding_table(intel_screen_private *intel, uint32_t ps_binding_table_offset) { OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2)); - OUT_BATCH(ps_binding_table_offset); + if (intel->use_resource_streamer) + OUT_BATCH(ps_binding_table_offset >> 1); + else + OUT_BATCH(ps_binding_table_offset); } void diff --git a/src/uxa/i965_reg.h b/src/uxa/i965_reg.h index a934a67..157b212 100644 --- a/src/uxa/i965_reg.h +++ b/src/uxa/i965_reg.h @@ -296,6 +296,14 @@ /* DW1 */ # define GEN7_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT 16 +/* GEN7+ resource streamer */ +#define GEN7_3DSTATE_BINDING_TABLE_POOL_ALLOC BRW_3D(3, 1, 0x19) +# define BINDING_TABLE_POOL_ENABLE 0x0860 +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_VS BRW_3D(3, 0, 0x43) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_GS BRW_3D(3, 0, 0x44) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_HS BRW_3D(3, 0, 0x45) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_DS BRW_3D(3, 0, 0x46) +#define GEN7_3DSTATE_BINDING_TABLE_EDIT_PS BRW_3D(3, 0, 0x47) #define PIPELINE_SELECT_3D 0 #define PIPELINE_SELECT_MEDIA 1 diff --git a/src/uxa/i965_render.c b/src/uxa/i965_render.c index 74f57af..d5225dd 100644 --- a/src/uxa/i965_render.c +++ b/src/uxa/i965_render.c @@ -1783,6 +1783,10 @@ static void i965_surface_flush(struct intel_screen_private *intel) sizeof(intel->surface_data), 4096); assert(intel->surface_bo); + drm_intel_bo_unreference(intel->hw_bt_pool_bo); + intel->hw_bt_pool_bo = drm_intel_bo_alloc(intel->bufmgr, "hw_bt", + 131072, 4096); + assert(intel->hw_bt_pool_bo); return; (void)ret; } @@ -2217,32 +2221,70 @@ static void i965_select_vertex_buffer(struct intel_screen_private *intel) static void i965_bind_surfaces(struct intel_screen_private *intel) { uint32_t *binding_table; + uint32_t surf0 = 0, surf1 = 0, surf2 = 0; assert(intel->surface_used + 4 * SURFACE_STATE_PADDED_SIZE <= sizeof(intel->surface_data)); - binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); - intel->surface_table = intel->surface_used; - intel->surface_used += SURFACE_STATE_PADDED_SIZE; - - binding_table[0] = - i965_set_picture_surface_state(intel, + surf0 = i965_set_picture_surface_state(intel, intel->render_dest_picture, intel->render_dest, TRUE); - binding_table[1] = - i965_set_picture_surface_state(intel, + surf1 = i965_set_picture_surface_state(intel, intel->render_source_picture, intel->render_source, FALSE); if (intel->render_mask) { - binding_table[2] = - i965_set_picture_surface_state(intel, - intel->render_mask_picture, - intel->render_mask, - FALSE); + surf2 = i965_set_picture_surface_state(intel, + intel->render_mask_picture, + intel->render_mask, + FALSE); + } + + if (intel->use_resource_streamer) { + intel->surface_table += (256 * sizeof(uint16_t)); + OUT_BATCH(GEN7_3DSTATE_BINDING_TABLE_EDIT_PS | (5 - 2)); + OUT_BATCH(0x3); + { + OUT_BATCH(0 << 16 | surf0 >> 5); + OUT_BATCH(1 << 16 | surf1 >> 5); + OUT_BATCH(2 << 16 | surf2 >> 5); + } + } else { + binding_table = (uint32_t*) (intel->surface_data + intel->surface_used); + intel->surfac
[Intel-gfx] [PATCH 2/2] drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START (v3)
v3: Use the I915_DISPATCH_RS flag to determine if batchbuffer needs resource streamer bit. Cc: Chris Wilson Cc: Daniel Vetter Cc: Ben Widawsky Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ringbuffer.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c246727..f3c9103 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -246,6 +246,7 @@ #define MI_BATCH_NON_SECURE_HSW (1<<13) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ #define MI_SEMAPHORE_GLOBAL_GTT(1<<22) #define MI_SEMAPHORE_UPDATE (1<<21) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b67104a..f9564b1 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1654,7 +1654,8 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); + (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW) | + (flags & I915_DISPATCH_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.8.2.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/hsw: Add I915_EXEC_RESOURCE_STREAMER flag (v3)
Ensures that the batch buffer is executed by the resource streamer. v3: - Make sure batch is only submitted on render ring and Haswell (Daniel) - Separate EXEC and DISPATCH flags (Chris) - Update __I915_EXEC_UNKNOWN_FLAGS (Kenneth) Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +++ drivers/gpu/drm/i915/intel_ringbuffer.h| 1 + include/uapi/drm/i915_drm.h| 7 ++- 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ce0d47..833677d 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -957,6 +957,13 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } if (args->flags & I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; + if (args->flags & I915_EXEC_RESOURCE_STREAMER) { + if ((args->flags & I915_EXEC_RING_MASK) != I915_EXEC_RENDER || + !IS_HASWELL(dev)) + return -EINVAL; + + flags |= I915_DISPATCH_RS; + } switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 71a73f4..bd74427 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -110,6 +110,7 @@ struct intel_ring_buffer { unsigned flags); #define I915_DISPATCH_SECURE 0x1 #define I915_DISPATCH_PINNED 0x2 +#define I915_DISPATCH_RS 0x4 void(*cleanup)(struct intel_ring_buffer *ring); int (*sync_to)(struct intel_ring_buffer *ring, struct intel_ring_buffer *to, diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a4e97b..edd6e31 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -731,7 +731,12 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) -#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<13) + +#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER <<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) #define i915_execbuffer2_set_context_id(eb2, context) \ -- 1.8.2.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START
Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c |7 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index c246727..f3c9103 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -246,6 +246,7 @@ #define MI_BATCH_NON_SECURE_HSW (1<<13) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ #define MI_SEMAPHORE_GLOBAL_GTT(1<<22) #define MI_SEMAPHORE_UPDATE (1<<21) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b67104a..c5dd71b 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1647,14 +1647,15 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, unsigned flags) { int ret; + int ring_emit_flags = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | + (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW); ret = intel_ring_begin(ring, 2); if (ret) return ret; - intel_ring_emit(ring, - MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); + intel_ring_emit(ring, ring_emit_flags | (flags & I915_EXEC_RESOURCE_STREAMER ? +MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/hsw: Add I915_EXEC_RESOURCE_STREAMER flag
Ensures that the batch buffer is executed by the resource streamer. Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 ++ include/uapi/drm/i915_drm.h|5 + 2 files changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 0ce0d47..4a56c58 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -962,6 +962,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, case I915_EXEC_DEFAULT: case I915_EXEC_RENDER: ring = &dev_priv->ring[RCS]; + flags |= (args->flags & I915_EXEC_RESOURCE_STREAMER) ? + I915_EXEC_RESOURCE_STREAMER : 0; break; case I915_EXEC_BSD: ring = &dev_priv->ring[VCS]; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 3a4e97b..5a4bd16 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -731,6 +731,11 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_HANDLE_LUT (1<<12) +/** Tell the kernel that the batchbuffer is processed by + * the resource streamer. + */ +#define I915_EXEC_RESOURCE_STREAMER (1<<13) + #define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_HANDLE_LUT<<1) #define I915_EXEC_CONTEXT_ID_MASK (0x) -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] drm/i915/hsw: Enable resource streamer (v2)
v2 of drm-i915 part of resource streamer enabling. Re-submitted finally now that the Mesa portions are starting to take shape. I also addressed some of the comments from Daniel and Chris from the previous implementation. Cc: Daniel Vetter Cc: Chris Wilson Abdiel Janulgue (2): drm/i915/hsw: Add I915_EXEC_RESOURCE_STREAMER flag drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START -- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++ drivers/gpu/drm/i915/i915_reg.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 7 --- include/uapi/drm/i915_drm.h| 5 + 4 files changed, 12 insertions(+), 3 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 2/2] drm/i915: Prevent loading of uninitialized context garbage
On Wednesday, August 21, 2013 11:12:36 PM Daniel Vetter wrote: > On Wed, Aug 21, 2013 at 06:31:07PM +0300, Ville Syrjälä wrote: > > On Wed, Aug 21, 2013 at 04:43:33PM +0300, Ville Syrjälä wrote: > > > On Thu, Aug 08, 2013 at 08:00:26PM +0100, Chris Wilson wrote: > > > > The extended state bits are stored in the LCA register and affect all > > > > updates to the LCA register - i.e. the state on the old context is > > > > saved > > > > when SAVE_EX_STATE_EN is currently set in the old context address > > > > before > > > > the update, and the new context is restored when RESTORE_EX_STATE_EN > > > > is > > > > set in the new context address. This is irrespective of the > > > > RESTORE_INHIBIT flag in the MI_SET_CONTEXT. > > > > > > > > Hence, upon initial loading the contents of the extended state is read > > > > from uninitialised data. To workaround this, on first load we do a > > > > dummy > > > > load without the mandatory RESTORE_EX_STATE_EN bit so that the real > > > > load > > > > causes us to initialise the extended state of the context before it is > > > > then loaded by the LCA update. > > > > > > > > v2: Split out the introduction of the variable length MI_SET_CONTEXT > > > > command sequence. > > > > > > > > References: https://bugs.freedesktop.org/show_bug.cgi?id=64073 > > > > Signed-off-by: Chris Wilson > > > > Cc: Ben Widawsky > > > > --- > > > > > > > > drivers/gpu/drm/i915/i915_gem_context.c | 18 ++ > > > > 1 file changed, 18 insertions(+) > > > > > > > > diff --git a/drivers/gpu/drm/i915/i915_gem_context.c > > > > b/drivers/gpu/drm/i915/i915_gem_context.c index 8a7b61e..a57d49a > > > > 100644 > > > > --- a/drivers/gpu/drm/i915/i915_gem_context.c > > > > +++ b/drivers/gpu/drm/i915/i915_gem_context.c > > > > @@ -367,6 +367,8 @@ mi_set_context(struct intel_ring_buffer *ring, > > > > > > > > case 5: len += 2; > > > > > > > > break; > > > > > > > > } > > > > > > > > + if (!new_context->is_initialized) > > > > + len += 2; > > > > > > > > ret = intel_ring_begin(ring, len); > > > > if (ret) > > > > > > > > @@ -382,6 +384,22 @@ mi_set_context(struct intel_ring_buffer *ring, > > > > > > > > break; > > > > > > > > } > > > > > > > > + if (!new_context->is_initialized) { > > > > + /* The GPU tries to restore the extended state > > > > irrespective > > > > +* of RestoreInhibit (since it is part of the LCA switch > > > > +* itself rather than the MI_SET_CONTEXT command). > > > > +* Since the initial contents may be garbage we do a > > > > dummy > > > > +* load first then set the mandatory flag for any future > > > > +* ring context switches. > > > > +*/ > > > > + intel_ring_emit(ring, MI_SET_CONTEXT); > > > > + intel_ring_emit(ring, > > > > + > > > > i915_gem_obj_ggtt_offset(new_context->obj) | > > > > + MI_MM_SPACE_GTT | > > > > + MI_SAVE_EXT_STATE_EN | > > > > + hw_flags); > > > > + } > > > > > > Hmm. Couldn't we just do this w/ one MI_SET_CONTEXT? Just drop the > > > MI_RESTORE_EXT_STATE_EN flag if the context is not initialized. The > > > MI_SAVE_EXT_STATE_EN will be saved in the CCID, so when we switch to > > > another context the extended state will be saved. And for the next > > > switch to this context we will set the MI_RESTORE_EXT_STATE_EN bit > > > in MI_SET_CONTEXT so it should get restored. > > > > > > But I must admit BSpec is a bit confusing on the topic. It says the > > > restore bit affects the switch to the context specified in the > > > logical context address. I take that to mean that the effect of the > > > restore bit is immediate. But BSpec also says that the bit is stored in > > > CCID to control the subsequent switch to the same context. So does that > > > actually mean that 'effective.restore_ext = CCID.restore_ext | > > > MI_SET_CONTEXT.restore_ext'? > > > > > > Oh, but BSpec also says that both bits must be set when RS2 power state > > > is enabled. I think that's the same as RC6, or is it? So I guess the > > > hardware might consult these bits when entering/leaving RC6. So I > > > suppose > > > we really need to make sure both bits are always set in case we hit RC6. > > > So based on that reasoning the patch would seem correct. > > > > > > I guess I'll give it an r-b regardless :) > > > > > > Reviewed-by: Ville Syrjälä > > > > I just noticed that on HSW these bits control the resource streamer > > state save/restore. The spec says we should always set the RS > > restore bit if we set the RS save bit. So maybe we need some > > !IS_HASWELL checks in there... > > Looks like we're lucky since we don't have RS support yet ;-) Can you > please poke Abdiel abou
[Intel-gfx] [RFC libdrm PATCH] intel: Add support for resource streamer
Expose defines for resource streamer controls. Based on the work of: Lukasz Anaczkowski Signed-off-by: Abdiel Janulgue --- include/drm/i915_drm.h |1 + 1 file changed, 1 insertion(+) diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index aa983f3..8ddda40 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -651,6 +651,7 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD(2<<0) #define I915_EXEC_BLT(3<<0) #define I915_EXEC_VEBOX (4<<0) +#define I915_EXEC_RS (8<<0) /* Used for switching the constants addressing mode on gen4+ RENDER ring. * Gen6+ only supports relative addressing to dynamic state (default) and -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/2] drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START
Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_reg.h |1 + drivers/gpu/drm/i915/intel_ringbuffer.c |6 -- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e9c50fa..3db1184 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -254,6 +254,7 @@ #define MI_BATCH_NON_SECURE_HSW (1<<13) #define MI_BATCH_BUFFER_START MI_INSTR(0x31, 0) #define MI_BATCH_GTT (2<<6) /* aliased with (1<<7) on gen4 */ +#define MI_BATCH_RESOURCE_STREAMER (1<<10) #define MI_SEMAPHORE_MBOX MI_INSTR(0x16, 1) /* gen6+ */ #define MI_SEMAPHORE_GLOBAL_GTT(1<<22) #define MI_SEMAPHORE_UPDATE (1<<21) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index bc4c11b..9940fb3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1678,14 +1678,16 @@ hsw_ring_dispatch_execbuffer(struct intel_ring_buffer *ring, unsigned flags) { int ret; + int ring_emit_flags = MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | + (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW); ret = intel_ring_begin(ring, 2); if (ret) return ret; intel_ring_emit(ring, - MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | - (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); + ring_emit_flags | + (flags & I915_EXEC_RS ? MI_BATCH_RESOURCE_STREAMER : 0)); /* bit0-7 is the length on GEN6+ */ intel_ring_emit(ring, offset); intel_ring_advance(ring); -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/2] drm/i915/hsw: Expose resource streamer control flags
Signed-off-by: Abdiel Janulgue --- drivers/gpu/drm/i915/i915_gem_execbuffer.c |2 ++ include/uapi/drm/i915_drm.h|1 + 2 files changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 5aeb447..3d6d3c9 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -865,6 +865,8 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data, } if (args->flags & I915_EXEC_IS_PINNED) flags |= I915_DISPATCH_PINNED; + if (args->flags & I915_EXEC_RS) + flags |= I915_EXEC_RS; switch (args->flags & I915_EXEC_RING_MASK) { case I915_EXEC_DEFAULT: diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 923ed7f..728f544 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -661,6 +661,7 @@ struct drm_i915_gem_execbuffer2 { #define I915_EXEC_BSD(2<<0) #define I915_EXEC_BLT(3<<0) #define I915_EXEC_VEBOX (4<<0) +#define I915_EXEC_RS (8<<0) /* Used for switching the constants addressing mode on gen4+ RENDER ring. * Gen6+ only supports relative addressing to dynamic state (default) and -- 1.7.9.5 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] RFC: drm/1915: Resource streamer initial support
Daniel Vetter suggested at some point we need to implement getparam ioctl so userspace can figure out whether kernel supports RS at runtime. For now, this will do to support the corresponding RFC mesa patches. Based on the work of: Lukasz Anaczkowski Abdiel Janulgue (2): drm/i915/hsw: Expose resource streamer control flags drm/i915/hsw: Enable resource streamer bit on MI_BATCH_BUFFER_START [abj@hasbox drm-intel]$ git diff bc32705a7686370b2e50d467b001d88c2c059e39 --stat drivers/gpu/drm/i915/i915_gem_execbuffer.c | 2 ++ drivers/gpu/drm/i915/i915_reg.h| 1 + drivers/gpu/drm/i915/intel_ringbuffer.c| 6 -- include/uapi/drm/i915_drm.h| 1 + 4 files changed, 8 insertions(+), 2 deletions(-) ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx