NEWS | 66 ++++++++ configure.ac | 20 ++ src/intel.h | 1 src/intel_display.c | 29 +++ src/intel_dri.c | 13 + src/intel_driver.c | 12 + src/intel_module.c | 6 src/intel_options.c | 2 src/sna/fb/fbblt.c | 2 src/sna/gen2_render.c | 27 ++- src/sna/gen3_render.c | 6 src/sna/gen4_render.c | 70 +++++---- src/sna/gen5_render.c | 36 +++- src/sna/gen6_render.c | 41 +++-- src/sna/gen7_render.c | 12 - src/sna/kgem.c | 124 +++++++++++++--- src/sna/kgem.h | 17 +- src/sna/sna.h | 22 ++ src/sna/sna_accel.c | 330 ++++++++++--------------------------------- src/sna/sna_blt.c | 11 + src/sna/sna_composite.c | 16 -- src/sna/sna_damage.c | 1 src/sna/sna_display.c | 161 ++++++++++++-------- src/sna/sna_dri.c | 43 +++++ src/sna/sna_driver.c | 58 ++++--- src/sna/sna_render.c | 47 +++--- src/sna/sna_render_inline.h | 25 ++- src/sna/sna_trapezoids.c | 38 ++-- src/sna/sna_video_overlay.c | 1 src/sna/sna_video_sprite.c | 12 + src/sna/sna_video_textured.c | 1 src/xvmc/Makefile.am | 2 32 files changed, 754 insertions(+), 498 deletions(-)
New commits: commit 8f1afde57dca27e6542b0b8e7c87750f3d6367bf Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Nov 11 16:16:20 2012 +0000 2.20.13 release Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/NEWS b/NEWS index 014921d..3d29cfe 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,45 @@ +Release 2.20.13 (2012-11-11) +============================ +Nothing but bug fixes. Many thanks to everyone who took the time to +report their issues, and for their help in improving the driver. + + * Sanity check the platform probe points to our expected i915 device + https://bugs.launchpad.net/ubuntu/+source/xserver-xorg-video-intel/+bug/1069031 + + * Prevent 16-bit overflow for computing the sample area to upload of + sources for render operations + https://bugs.freedesktop.org/show_bug.cgi?id=56324 + + * Clamp the drawable box for migration to prevent 16-bit overflow + https://bugs.freedesktop.org/show_bug.cgi?id=56591 + + * Disable RandR hotplug events if Xinerama is enabled and thereby prevent + a crash upon hotplug + https://bugs.freedesktop.org/show_bug.cgi?id=55260 + + * Call ValidatePicture before attempting to flatten the alphamaps + https://bugs.freedesktop.org/show_bug.cgi?id=56367 + + * Clip the trapezoid correctly if it ends on the boundary pixel + https://bugs.freedesktop.org/show_bug.cgi?id=56395 + + * Make sure the pipeline choice is propagated to the scanline wait + across a batch flush + https://bugs.freedesktop.org/show_bug.cgi?id=47597 + + * Set the valid drawable box when choosing placement of BLT composite ops + https://bugs.freedesktop.org/show_bug.cgi?id=47597 + + * Prevent use-after-free when promoting a partial-GPU bo to a full-GPU bo + https://bugs.freedesktop.org/show_bug.cgi?id=56591 + + * gen4 opacity spans require the per-rectangle workaround + https://bugs.freedesktop.org/show_bug.cgi?id=55500 + + * Prevent use of invalid damage pointers when redirecting rendering + https://bugs.freedesktop.org/show_bug.cgi?id=56785 + + Release 2.20.12 (2012-10-20) ============================ More bug reports, more bug fixes! Perhaps the headline feature is diff --git a/configure.ac b/configure.ac index ce3b007..d92269f 100644 --- a/configure.ac +++ b/configure.ac @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-intel], - [2.20.12], + [2.20.13], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-intel]) AC_CONFIG_SRCDIR([Makefile.am]) commit b16219a19f48b52dda91f26fcbbbbeda056589ab Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Nov 11 11:05:35 2012 +0000 sna: Filter out the full-damage marker when undoing redirection ==25902== Invalid read of size 4 ==25902== at 0x4980E13: _list_del (intel_list.h:218) ==25902== by 0x4980EAB: list_del (intel_list.h:240) ==25902== by 0x4981F4B: free_list (sna_damage.c:403) ==25902== by 0x4985131: __sna_damage_destroy (sna_damage.c:1467) ==25902== by 0x49A5276: sna_render_composite_redirect_done (sna_render.c:1921) ==25902== by 0x49C68FC: gen2_render_composite_done (gen2_render.c:1136) ==25902== by 0x497F90F: sna_composite (sna_composite.c:567) ==25902== by 0x4994725: glyphs_via_mask (sna_glyphs.c:1139) ==25902== by 0x4995FB7: sna_glyphs (sna_glyphs.c:1688) ==25902== by 0x8150EB4: ??? (in /usr/bin/Xorg) ==25902== by 0x813CA38: CompositeGlyphs (in /usr/bin/Xorg) ==25902== by 0x8146DE1: ??? (in /usr/bin/Xorg) ==25902== Address 0x7c079ac2 is not stack'd, malloc'd or (recently) free'd Reported-by: bonbon...@internet.lu Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index 3cb1449..34c795b 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -1914,11 +1914,13 @@ sna_render_composite_redirect_done(struct sna *sna, assert(ok); } if (t->damage) { - DBG(("%s: combining damage, offset=(%d, %d)\n", - __FUNCTION__, t->box.x1, t->box.y1)); - sna_damage_combine(t->real_damage, t->damage, + DBG(("%s: combining damage (all? %d), offset=(%d, %d)\n", + __FUNCTION__, DAMAGE_IS_ALL(t->damage), + t->box.x1, t->box.y1)); + sna_damage_combine(t->real_damage, + DAMAGE_PTR(t->damage), t->box.x1, t->box.y1); - __sna_damage_destroy(t->damage); + __sna_damage_destroy(DAMAGE_PTR(t->damage)); } kgem_bo_destroy(&sna->kgem, op->dst.bo); commit 69acbb77e8aad3370d5e8d9a9e067c54872d7082 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Nov 11 10:49:59 2012 +0000 sna: Fix printing of uninitialied value in DBG ==25902== Use of uninitialised value of size 4 ==25902== at 0x423098E: _itoa_word (_itoa.c:196) ==25902== by 0x4233F7F: vfprintf (vfprintf.c:1602) ==25902== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65) ==25902== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg) ==25902== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg) ==25902== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg) ==25902== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg) ==25902== by 0x81DC333: ErrorF (in /usr/bin/Xorg) ==25902== by 0x49B2FA8: trapezoid_span_inplace__x8r8g8b8 (sna_trapezoids.c:5069) ==25902== by 0x49B3407: trapezoid_span_inplace (sna_trapezoids.c:5166) ==25902== by 0x49B4C96: sna_composite_trapezoids (sna_trapezoids.c:5619) Reported-by: bonbon...@internet.lu Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c index 008ba2e..8f2ea34 100644 --- a/src/sna/sna_trapezoids.c +++ b/src/sna/sna_trapezoids.c @@ -5066,8 +5066,8 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op, pixmap = get_drawable_pixmap(dst->pDrawable); get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y); - DBG(("%s: format=%x, op=%d, color=%x\n", - __FUNCTION__, dst->format, op, color)); + DBG(("%s: format=%x, op=%d, lerp?=%d\n", + __FUNCTION__, dst->format, op, lerp)); if (lerp) { struct inplace inplace; commit 66e4c8ff40ab8cf722efa4293bb17b0d8f2dfa88 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sun Nov 11 09:40:09 2012 +0000 sna: Flush pending rendering before enabling an output This is to prevent falling in the trap of the rendering being delayed until the next client renders some new content. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c index 87acb5d..d384bb2 100644 --- a/src/sna/sna_display.c +++ b/src/sna/sna_display.c @@ -1251,6 +1251,8 @@ retry: /* Attach per-crtc pixmap or direct */ if (bo == NULL) return FALSE; + kgem_bo_submit(&sna->kgem, bo); + sna_crtc->bo = bo; mode_to_kmode(&sna_crtc->kmode, mode); if (!sna_crtc_apply(crtc)) { commit 94dd0b9ee9f55e7c09b8c0ee18939fa69ce66da2 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Nov 10 16:52:09 2012 +0000 sna/gen2: Fix use of uninitialised redirection ==29553== Invalid read of size 4 ==29553== at 0x4980E1B: _list_del (intel_list.h:218) ==29553== by 0x4980EB3: list_del (intel_list.h:240) ==29553== by 0x4981F53: free_list (sna_damage.c:403) ==29553== by 0x4985139: __sna_damage_destroy (sna_damage.c:1467) ==29553== by 0x49A527E: sna_render_composite_redirect_done (sna_render.c:1921) ==29553== by 0x49C6904: gen2_render_composite_done (gen2_render.c:1136) ==29553== by 0x497F917: sna_composite (sna_composite.c:567) ==29553== by 0x8150C41: ??? (in /usr/bin/Xorg) ==29553== by 0x8142F13: CompositePicture (in /usr/bin/Xorg) ==29553== by 0x8145F58: ??? (in /usr/bin/Xorg) ==29553== by 0x81436F2: ??? (in /usr/bin/Xorg) ==29553== by 0x807965C: ??? (in /usr/bin/Xorg) ==29553== Address 0x9407e188 is not stack'd, malloc'd or (recently) free'd Reported-by: bonbon...@internet.lu Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56785 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 6e51c18..9663dff 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -1803,6 +1803,8 @@ gen2_render_composite(struct sna *sna, } tmp->op = op; + + sna_render_composite_redirect_init(tmp); if (too_large(tmp->dst.width, tmp->dst.height) || tmp->dst.bo->pitch > MAX_3D_PITCH) { if (!sna_render_composite_redirect(sna, tmp, @@ -2298,6 +2300,8 @@ gen2_render_composite_spans(struct sna *sna, } tmp->base.op = op; + + sna_render_composite_redirect_init(&tmp->base); if (too_large(tmp->base.dst.width, tmp->base.dst.height) || tmp->base.dst.bo->pitch > MAX_3D_PITCH) { if (!sna_render_composite_redirect(sna, &tmp->base, commit 0f1c30818c9d782b066147448bbcc9ac95ac834f Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Nov 10 16:52:09 2012 +0000 sna: Fix use of uninitialised value in DBG ==29553== Use of uninitialised value of size 4 ==29553== at 0x4230964: _itoa_word (_itoa.c:195) ==29553== by 0x4233F7F: vfprintf (vfprintf.c:1602) ==29553== by 0x42FAFAD: __vsnprintf_chk (vsnprintf_chk.c:65) ==29553== by 0x81DBE8E: Xvscnprintf (in /usr/bin/Xorg) ==29553== by 0x81DC8FB: LogVMessageVerb (in /usr/bin/Xorg) ==29553== by 0x81DCA62: LogVWrite (in /usr/bin/Xorg) ==29553== by 0x81DCA9B: VErrorF (in /usr/bin/Xorg) ==29553== by 0x81DC333: ErrorF (in /usr/bin/Xorg) ==29553== by 0x49434F0: kgem_create_buffer (kgem.c:4887) ==29553== by 0x4943B09: kgem_create_buffer_2d (kgem.c:4969) ==29553== by 0x4943E19: kgem_upload_source_image (kgem.c:5021) ==29553== by 0x49A0567: upload (sna_render.c:505) ==29553== Reported-by: bonbon...@internet.lu References: https://bugs.freedesktop.org/show_bug.cgi?id=56785 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 28e69c3..4fb8a6f 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -4885,7 +4885,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, } DBG(("%s: created handle=%d for buffer\n", - __FUNCTION__, bo->base.handle)); + __FUNCTION__, handle)); __kgem_bo_init(&bo->base, handle, alloc); debug_alloc(kgem, alloc * PAGE_SIZE); commit cc2b13c9c05e57dc5004d93b56f332ea95f0a4ef Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Nov 10 11:50:15 2012 +0000 sna: Specify read/write domains for no-relocation fastpath On review (read triggering BUGs), we do need to supply the domain tracking of the buffers that is being replaced from the relocation path. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.c b/src/sna/kgem.c index ea56adf..28e69c3 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -114,8 +114,6 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_EXEC_NO_RELOC (1<<10) #define LOCAL_I915_EXEC_HANDLE_LUT (1<<11) -#define LOCAL_EXEC_OBJECT_WRITE (1<<1) - #define LOCAL_I915_GEM_USERPTR 0x32 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) struct local_i915_gem_userptr { @@ -2315,7 +2313,9 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].alignment = 0; kgem->exec[i].offset = rq->bo->presumed_offset; kgem->exec[i].flags = 0; - kgem->exec[i].rsvd1 = 0; + kgem->exec[i].rsvd1 = (I915_GEM_DOMAIN_COMMAND | + I915_GEM_DOMAIN_INSTRUCTION | + I915_GEM_DOMAIN_VERTEX); kgem->exec[i].rsvd2 = 0; rq->bo->target_handle = kgem->has_handle_lut ? i : handle; @@ -3925,9 +3925,10 @@ uint32_t kgem_add_reloc(struct kgem *kgem, kgem->reloc[index].target_handle = bo->target_handle; kgem->reloc[index].presumed_offset = bo->presumed_offset; - if (read_write_domain & 0x7ff) { + bo->exec->rsvd1 |= read_write_domain >> 16; + if (read_write_domain & 0x7fff) { assert(!bo->snoop || kgem->can_blt_cpu); - bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE; + bo->exec->rsvd1 |= (uint64_t)(read_write_domain & 0x7fff) << 32; kgem_bo_mark_dirty(bo); } @@ -4353,10 +4354,10 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo) void kgem_clear_dirty(struct kgem *kgem) { - struct kgem_request *rq = kgem->next_request; + struct list * const buffers = &kgem->next_request->buffers; struct kgem_bo *bo; - list_for_each_entry(bo, &rq->buffers, request) { + list_for_each_entry(bo, buffers, request) { if (!bo->dirty) break; commit 0c4a2bcc3d63ecc02e3a940e38e9a416b51ad0c8 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Nov 10 12:34:52 2012 +0000 sna: Allow snooped buffers to be retained (and reused) between batches Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.c b/src/sna/kgem.c index f22febd..ea56adf 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -2023,10 +2023,9 @@ static void kgem_finish_buffers(struct kgem *kgem) used = ALIGN(bo->used + PAGE_SIZE-1, PAGE_SIZE); if (!DBG_NO_UPLOAD_ACTIVE && used + PAGE_SIZE <= bytes(&bo->base) && - (kgem->has_llc || !IS_CPU_MAP(bo->base.map))) { + (kgem->has_llc || !IS_CPU_MAP(bo->base.map) || bo->base.snoop)) { DBG(("%s: retaining upload buffer (%d/%d)\n", __FUNCTION__, bo->used, bytes(&bo->base))); - assert(!bo->base.snoop); bo->used = used; list_move(&bo->base.list, &kgem->active_buffers); @@ -4663,8 +4662,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem, assert(bo->base.io); assert(bo->base.refcnt >= 1); assert(bo->mmapped); - assert(!bo->base.snoop); - assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc); + assert(!IS_CPU_MAP(bo->base.map) || kgem->has_llc || bo->base.snoop); if ((bo->write & ~flags) & KGEM_BUFFER_INPLACE) { DBG(("%s: skip write %x buffer, need %x\n", commit f5d79b202dd448e61ab6ffce26fe9cbf9051d770 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Sat Nov 10 10:30:04 2012 +0000 sna/gen2: Add a modicum of fallback DBG References: https://bugs.freedesktop.org/show_bug.cgi?id=56785 Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 024b107..6e51c18 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -1816,6 +1816,8 @@ gen2_render_composite(struct sna *sna, dst_x, dst_y, dst->polyMode == PolyModePrecise)) { case -1: + DBG(("%s: fallback -- unable to prepare source\n", + __FUNCTION__)); goto cleanup_dst; case 0: gen2_composite_solid_init(sna, &tmp->src, 0); @@ -1839,6 +1841,8 @@ gen2_render_composite(struct sna *sna, dst_x, dst_y, dst->polyMode == PolyModePrecise)) { case -1: + DBG(("%s: fallback -- unable to prepare mask\n", + __FUNCTION__)); goto cleanup_src; case 0: gen2_composite_solid_init(sna, &tmp->mask, 0); @@ -1855,8 +1859,12 @@ gen2_render_composite(struct sna *sna, tmp->has_component_alpha = true; if (gen2_blend_op[op].src_alpha && (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { - if (op != PictOpOver) - return false; + if (op != PictOpOver) { + DBG(("%s: fallback -- unsupported CA blend (src_blend=%d)\n", + __FUNCTION__, + gen2_blend_op[op].src_blend)); + goto cleanup_dst; + } tmp->need_magic_ca_pass = true; tmp->op = PictOpOutReverse; @@ -1903,8 +1911,11 @@ gen2_render_composite(struct sna *sna, kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, tmp->dst.bo, tmp->src.bo, tmp->mask.bo, - NULL)) + NULL)) { + DBG(("%s: fallback, operation does not fit into GTT\n", + __FUNCTION__)); goto cleanup_mask; + } } gen2_emit_composite_state(sna, tmp); commit 27327633138dce159ca2e91fe5eac1565bd45e1c Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri Nov 9 17:08:01 2012 +0000 sna/gen4: Only 965gm suffers the !snoop restriction So fixup the bogus assertion for g4x Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index 6d44a4a..be97458 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -649,7 +649,7 @@ gen4_bind_bo(struct sna *sna, uint32_t domains; uint16_t offset; - assert(!kgem_bo_is_snoop(bo)); + assert(sna->kgem.gen != 40 || !kgem_bo_is_snoop(bo)); /* After the first bind, we manage the cache domains within the batch */ offset = kgem_bo_get_binding(bo, format); commit 8d3b5ea135fd8f16da2cbfb98041e32c7001a38f Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Fri Nov 9 15:31:03 2012 +0000 xvmc: Use DRMINTEL_LIBS instead of hardcoding -ldrm_intel Reported-by: Maarten Lankhorst <maarten.lankho...@canonical.com> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/xvmc/Makefile.am b/src/xvmc/Makefile.am index d3ed449..36a939b 100644 --- a/src/xvmc/Makefile.am +++ b/src/xvmc/Makefile.am @@ -20,4 +20,4 @@ AM_CFLAGS = @XORG_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \ @XVMCLIB_CFLAGS@ -I$(top_srcdir)/src -DTRUE=1 -DFALSE=0 libIntelXvMC_la_LDFLAGS = -version-number 1:0:0 -libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ -lpthread -ldrm_intel +libIntelXvMC_la_LIBADD = @DRI_LIBS@ @DRM_LIBS@ @XVMCLIB_LIBS@ @DRMINTEL_LIBS@ -lpthread commit f040b97b01495aa43f7771ebb8ca5c0d44038bc1 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Nov 8 23:42:10 2012 +0000 sna: Mark no-reloc write buffers If we bypass the relocation processing, we also then bypass the pending-write analysis, so we need to supply those to the kernel ourselves (to maintain gpu-cpu coherency). Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.c b/src/sna/kgem.c index e2c5da8..f22febd 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -114,6 +114,8 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_EXEC_NO_RELOC (1<<10) #define LOCAL_I915_EXEC_HANDLE_LUT (1<<11) +#define LOCAL_EXEC_OBJECT_WRITE (1<<1) + #define LOCAL_I915_GEM_USERPTR 0x32 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) struct local_i915_gem_userptr { @@ -3926,6 +3928,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, if (read_write_domain & 0x7ff) { assert(!bo->snoop || kgem->can_blt_cpu); + bo->exec->flags |= LOCAL_EXEC_OBJECT_WRITE; kgem_bo_mark_dirty(bo); } commit 85ba7e96268dbb8da4bb34078333695a451c6570 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Nov 8 15:56:13 2012 +0000 sna: Experiment with using reloc.handle as an index into the execbuffer Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/configure.ac b/configure.ac index 9ea1e3c..ce3b007 100644 --- a/configure.ac +++ b/configure.ac @@ -301,6 +301,15 @@ if test "x$FASTRELOC" = xyes; then AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support]) fi +AC_ARG_ENABLE(handle-lut, + AS_HELP_STRING([--enable-handle-lut], + [Enable use of "handle LUT" (experimental) [default=no]]), + [HANDLE_LUT="$enableval"], + [HANDLE_LUT=no]) +if test "x$HANDLE_LUT" = xyes; then + AC_DEFINE(USE_HANDLE_LUT,1,[Assume "handle LUT" support]) +fi + AC_ARG_ENABLE(async-swap, AS_HELP_STRING([--enable-async-swap], [Enable use of asynchronous swaps (experimental) [default=no]]), diff --git a/src/sna/kgem.c b/src/sna/kgem.c index e643b85..e2c5da8 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -71,6 +71,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_RELAXED_FENCING 0 #define DBG_NO_SECURE_BATCHES 0 #define DBG_NO_FAST_RELOC 0 +#define DBG_NO_HANDLE_LUT 0 #define DBG_DUMP 0 #define SHOW_BATCH 0 @@ -80,6 +81,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_FAST_RELOC 1 #endif +#ifndef USE_HANDLE_LUT +#undef DBG_NO_HANDLE_LUT +#define DBG_NO_HANDLE_LUT 1 +#endif + /* Worst case seems to be 965gm where we cannot write within a cacheline that * is being simultaneously being read by the GPU, or within the sampler * prefetch. In general, the chipsets seem to have a requirement that sampler @@ -103,8 +109,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 #define LOCAL_I915_PARAM_HAS_NO_RELOC 24 +#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 25 #define LOCAL_I915_EXEC_NO_RELOC (1<<10) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11) #define LOCAL_I915_GEM_USERPTR 0x32 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) @@ -668,6 +676,14 @@ static bool test_has_no_reloc(struct kgem *kgem) return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; } +static bool test_has_handle_lut(struct kgem *kgem) +{ + if (DBG_NO_HANDLE_LUT) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; +} + static bool test_has_semaphores_enabled(struct kgem *kgem) { FILE *file; @@ -859,6 +875,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) DBG(("%s: has no-reloc? %d\n", __FUNCTION__, kgem->has_no_reloc)); + kgem->has_handle_lut = test_has_handle_lut(kgem); + DBG(("%s: has handle-lut? %d\n", __FUNCTION__, + kgem->has_handle_lut)); + kgem->has_semaphores = false; if (kgem->has_blt && test_has_semaphores_enabled(kgem)) kgem->has_semaphores = true; @@ -1212,6 +1232,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) __FUNCTION__, bo->handle, kgem->nexec)); assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); exec->handle = bo->handle; exec->offset = bo->presumed_offset; @@ -1246,8 +1267,8 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) int n; for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == 0) { - kgem->reloc[n].target_handle = bo->handle; + if (kgem->reloc[n].target_handle == ~0U) { + kgem->reloc[n].target_handle = bo->target_handle; kgem->reloc[n].presumed_offset = bo->presumed_offset; kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = kgem->reloc[n].delta + bo->presumed_offset; @@ -2047,9 +2068,11 @@ static void kgem_finish_buffers(struct kgem *kgem) gem_write(kgem->fd, shrink->handle, 0, bo->used, bo->mem); + shrink->target_handle = + kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == bo->base.handle) { - kgem->reloc[n].target_handle = shrink->handle; + if (kgem->reloc[n].target_handle == bo->base.target_handle) { + kgem->reloc[n].target_handle = shrink->target_handle; kgem->reloc[n].presumed_offset = shrink->presumed_offset; kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = kgem->reloc[n].delta + shrink->presumed_offset; @@ -2202,6 +2225,8 @@ void kgem_reset(struct kgem *kgem) kgem->batch_flags = 0; if (kgem->has_no_reloc) kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC; + if (kgem->has_handle_lut) + kgem->batch_flags |= LOCAL_I915_EXEC_HANDLE_LUT; kgem->next_request = __kgem_request_alloc(); @@ -2227,7 +2252,7 @@ static int compact_batch_surface(struct kgem *kgem) shrink *= sizeof(uint32_t); for (n = 0; n < kgem->nreloc; n++) { if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && - kgem->reloc[n].target_handle == 0) + kgem->reloc[n].target_handle == ~0U) kgem->reloc[n].delta -= shrink; if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) @@ -2292,6 +2317,7 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].rsvd1 = 0; kgem->exec[i].rsvd2 = 0; + rq->bo->target_handle = kgem->has_handle_lut ? i : handle; rq->bo->exec = &kgem->exec[i]; rq->bo->rq = rq; /* useful sanity check */ list_add(&rq->bo->request, &rq->buffers); @@ -3895,7 +3921,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, } kgem->reloc[index].delta = delta; - kgem->reloc[index].target_handle = bo->handle; + kgem->reloc[index].target_handle = bo->target_handle; kgem->reloc[index].presumed_offset = bo->presumed_offset; if (read_write_domain & 0x7ff) { @@ -3906,7 +3932,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, delta += bo->presumed_offset; } else { kgem->reloc[index].delta = delta; - kgem->reloc[index].target_handle = 0; + kgem->reloc[index].target_handle = ~0U; kgem->reloc[index].presumed_offset = 0; } kgem->reloc[index].read_domains = read_write_domain >> 16; diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 8789b55..b42a8e0 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -64,6 +64,7 @@ struct kgem_bo { uint32_t unique_id; uint32_t refcnt; uint32_t handle; + uint32_t target_handle; uint32_t presumed_offset; uint32_t delta; union { @@ -165,6 +166,7 @@ struct kgem { uint32_t has_cacheing :1; uint32_t has_llc :1; uint32_t has_no_reloc :1; + uint32_t has_handle_lut :1; uint32_t can_blt_cpu :1; commit 93d8dddbb92431d6e2c48a17b71cac9f7047902e Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Nov 8 09:41:21 2012 +0000 sna: Set the known offset for the batch as well Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 131a209..e643b85 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -2287,7 +2287,7 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].relocation_count = kgem->nreloc; kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; kgem->exec[i].alignment = 0; - kgem->exec[i].offset = 0; + kgem->exec[i].offset = rq->bo->presumed_offset; kgem->exec[i].flags = 0; kgem->exec[i].rsvd1 = 0; kgem->exec[i].rsvd2 = 0; commit 120fa0ef8d04f5e82e5f7a0636033d3d96efa1e8 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Wed Nov 7 17:41:20 2012 +0000 sna: Support a fast no relocation changed path x11perf -copywinwin10 on gm45 with c2d L9400: before: 553,000 op/s after: 565,000 op/s Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/configure.ac b/configure.ac index 8ddf40b..9ea1e3c 100644 --- a/configure.ac +++ b/configure.ac @@ -292,6 +292,15 @@ if test "x$USERPTR" = xyes; then AC_DEFINE(USE_USERPTR,1,[Assume USERPTR support]) fi +AC_ARG_ENABLE(fast-reloc, + AS_HELP_STRING([--enable-fast-reloc], + [Enable use of "fast reloc" (experimental) [default=no]]), + [FASTRELOC="$enableval"], + [FASTRELOC=no]) +if test "x$FASTRELOC" = xyes; then + AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support]) +fi + AC_ARG_ENABLE(async-swap, AS_HELP_STRING([--enable-async-swap], [Enable use of asynchronous swaps (experimental) [default=no]]), diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 9c01694..131a209 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -70,10 +70,16 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_MAP_UPLOAD 0 #define DBG_NO_RELAXED_FENCING 0 #define DBG_NO_SECURE_BATCHES 0 +#define DBG_NO_FAST_RELOC 0 #define DBG_DUMP 0 #define SHOW_BATCH 0 +#ifndef USE_FASTRELOC +#undef DBG_NO_FAST_RELOC +#define DBG_NO_FAST_RELOC 1 +#endif + /* Worst case seems to be 965gm where we cannot write within a cacheline that * is being simultaneously being read by the GPU, or within the sampler * prefetch. In general, the chipsets seem to have a requirement that sampler @@ -96,6 +102,9 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 +#define LOCAL_I915_PARAM_HAS_NO_RELOC 24 + +#define LOCAL_I915_EXEC_NO_RELOC (1<<10) #define LOCAL_I915_GEM_USERPTR 0x32 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) @@ -651,6 +660,14 @@ static bool test_has_execbuffer2(struct kgem *kgem) errno == EFAULT); } +static bool test_has_no_reloc(struct kgem *kgem) +{ + if (DBG_NO_FAST_RELOC) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; +} + static bool test_has_semaphores_enabled(struct kgem *kgem) { FILE *file; @@ -838,6 +855,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) DBG(("%s: has userptr? %d\n", __FUNCTION__, kgem->has_userptr)); + kgem->has_no_reloc = test_has_no_reloc(kgem); + DBG(("%s: has no-reloc? %d\n", __FUNCTION__, + kgem->has_no_reloc)); + kgem->has_semaphores = false; if (kgem->has_blt && test_has_semaphores_enabled(kgem)) kgem->has_semaphores = true; @@ -2177,8 +2198,10 @@ void kgem_reset(struct kgem *kgem) kgem->nbatch = 0; kgem->surface = kgem->batch_size; kgem->mode = KGEM_NONE; - kgem->batch_flags = 0; kgem->flush = 0; + kgem->batch_flags = 0; + if (kgem->has_no_reloc) + kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC; kgem->next_request = __kgem_request_alloc(); diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 7e48db5..8789b55 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -164,6 +164,7 @@ struct kgem { uint32_t has_secure_batches :1; uint32_t has_cacheing :1; uint32_t has_llc :1; + uint32_t has_no_reloc :1; uint32_t can_blt_cpu :1; commit b7d2fcf47a9569d0944097a8be60ca3be72b42f6 Author: Chris Wilson <ch...@chris-wilson.co.uk> Date: Thu Nov 8 08:55:25 2012 +0000 Remove reliance on hard-coded DRI name This provides for using the existing DDX with future DRI drivers which may break from the traditional names - but only with the help of the user/packager. This scheme needs to be replaced with a robust mechanism for driver loading if AIGLX and co are to be kept. Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk> diff --git a/src/intel_dri.c b/src/intel_dri.c index 867a465..17d9d50 100644 --- a/src/intel_dri.c +++ b/src/intel_dri.c @@ -1515,6 +1515,17 @@ out_complete: static int dri2_server_generation; #endif +static const char *dri_driver_name(intel_screen_private *intel) +{ + const char *s = xf86GetOptValString(intel->Options, OPTION_DRI); + Bool dummy; + + if (s == NULL || xf86getBoolValue(&dummy, s)) + return INTEL_INFO(intel)->gen < 40 ? "i915" : "i965"; + + return s; +} + Bool I830DRI2ScreenInit(ScreenPtr screen) { ScrnInfoPtr scrn = xf86ScreenToScrn(screen); @@ -1564,7 +1575,7 @@ Bool I830DRI2ScreenInit(ScreenPtr screen) intel->deviceName = drmGetDeviceNameFromFd(intel->drmSubFD); memset(&info, '\0', sizeof(info)); info.fd = intel->drmSubFD; - info.driverName = INTEL_INFO(intel)->gen < 40 ? "i915" : "i965"; + info.driverName = dri_driver_name(intel); info.deviceName = intel->deviceName; #if DRI2INFOREC_VERSION == 1 diff --git a/src/intel_driver.c b/src/intel_driver.c index 3029b22..254aafa 100644 --- a/src/intel_driver.c +++ b/src/intel_driver.c @@ -221,11 +221,19 @@ static Bool I830GetEarlyOptions(ScrnInfoPtr scrn) return TRUE; } +static Bool intel_option_cast_string_to_bool(intel_screen_private *intel, + int id, Bool val) +{ + xf86getBoolValue(&val, xf86GetOptValString(intel->Options, id)); + return val; +} + static void intel_check_dri_option(ScrnInfoPtr scrn) { intel_screen_private *intel = intel_get_screen_private(scrn); + intel->directRenderingType = DRI_NONE; - if (!xf86ReturnOptValBool(intel->Options, OPTION_DRI, TRUE)) + if (!intel_option_cast_string_to_bool(intel, OPTION_DRI, TRUE)) intel->directRenderingType = DRI_DISABLED; if (scrn->depth != 16 && scrn->depth != 24 && scrn->depth != 30) { diff --git a/src/intel_options.c b/src/intel_options.c index dcab9e7..443e84d 100644 --- a/src/intel_options.c +++ b/src/intel_options.c @@ -8,7 +8,7 @@ const OptionInfoRec intel_options[] = { {OPTION_ACCEL_DISABLE, "NoAccel", OPTV_BOOLEAN, {0}, 0}, {OPTION_ACCEL_METHOD, "AccelMethod", OPTV_STRING, {0}, 0}, {OPTION_BACKLIGHT, "Backlight", OPTV_STRING, {0}, 0}, - {OPTION_DRI, "DRI", OPTV_BOOLEAN, {0}, 1}, + {OPTION_DRI, "DRI", OPTV_STRING, {0}, 0}, {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, 0}, {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, 0}, {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, 1}, diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c index 83c79c1..23d9572 100644 --- a/src/sna/sna_dri.c +++ b/src/sna/sna_dri.c @@ -41,6 +41,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "sna.h" #include "sna_reg.h" +#include "intel_options.h" #include <xf86drm.h> #include <i915_drm.h> @@ -2317,6 +2318,17 @@ out_complete: } #endif +static const char *dri_driver_name(struct sna *sna) +{ + const char *s = xf86GetOptValString(sna->Options, OPTION_DRI); + Bool dummy; + + if (s == NULL || xf86getBoolValue(&dummy, s)) + return (sna->kgem.gen && sna->kgem.gen < 40) ? "i915" : "i965"; + + return s; +} + bool sna_dri_open(struct sna *sna, ScreenPtr screen) { DRI2InfoRec info; @@ -2344,8 +2356,7 @@ bool sna_dri_open(struct sna *sna, ScreenPtr screen) sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd); memset(&info, '\0', sizeof(info)); info.fd = sna->kgem.fd; - info.driverName = - (sna->kgem.gen && sna->kgem.gen < 40) ? "i915" : "i965"; + info.driverName = dri_driver_name(sna); info.deviceName = sna->deviceName; DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c index 055c71c..f214b90 100644 --- a/src/sna/sna_driver.c +++ b/src/sna/sna_driver.c @@ -363,6 +363,12 @@ static void sna_setup_capabilities(ScrnInfoPtr scrn, int fd) #endif } +static Bool sna_option_cast_to_bool(struct sna *sna, int id, Bool val) +{ + xf86getBoolValue(&val, xf86GetOptValString(sna->Options, id)); + return val; +} + -- To UNSUBSCRIBE, email to debian-x-requ...@lists.debian.org with a subject of "unsubscribe". Trouble? Contact listmas...@lists.debian.org Archive: http://lists.debian.org/e1talkr-0006hz...@vasks.debian.org