VERSION | 2 configure.ac | 4 debian/changelog | 7 debian/control | 4 debian/rules | 2 docs/relnotes/11.2.0.html | 3 docs/relnotes/11.2.1.html | 118 ++++++++ include/D3D9/d3d9types.h | 16 - src/compiler/glsl/glsl_lexer.ll | 2 src/compiler/glsl/lower_variable_index_to_cond_assign.cpp | 20 + src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp | 2 src/gallium/drivers/radeonsi/si_state.c | 2 src/gallium/state_trackers/va/context.c | 4 src/gallium/state_trackers/va/image.c | 4 src/mesa/drivers/dri/common/drirc | 8 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 14 src/mesa/drivers/dri/i965/gen7_blorp.cpp | 5 src/mesa/drivers/dri/i965/intel_tiled_memcpy.c | 198 ++++++-------- src/mesa/state_tracker/st_cb_drawpixels.c | 23 + src/mesa/state_tracker/st_cb_fbo.c | 2 21 files changed, 319 insertions(+), 131 deletions(-)
New commits: commit abf028947440577413c49feffbb15192701d991e Author: Timo Aaltonen <tjaal...@debian.org> Date: Mon Apr 18 11:25:26 2016 +0300 release to experimental diff --git a/debian/changelog b/debian/changelog index 0328b2c..9c11bba 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,9 +1,9 @@ -mesa (11.2.1-1) UNRELEASED; urgency=medium +mesa (11.2.1-1) experimental; urgency=medium * New upstream release. * control, rules: Use llvm/clang 3.8. - -- Timo Aaltonen <tjaal...@debian.org> Mon, 18 Apr 2016 11:20:48 +0300 + -- Timo Aaltonen <tjaal...@debian.org> Mon, 18 Apr 2016 11:25:00 +0300 mesa (11.2.0-1) experimental; urgency=medium commit 436b3472adde14b22e9ce204820dab417cfe00c6 Author: Timo Aaltonen <tjaal...@debian.org> Date: Mon Apr 18 11:24:58 2016 +0300 control, rules: Use llvm/clang 3.8. diff --git a/debian/changelog b/debian/changelog index cf98bea..0328b2c 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,7 @@ mesa (11.2.1-1) UNRELEASED; urgency=medium * New upstream release. + * control, rules: Use llvm/clang 3.8. -- Timo Aaltonen <tjaal...@debian.org> Mon, 18 Apr 2016 11:20:48 +0300 diff --git a/debian/control b/debian/control index e495555..19c802f 100644 --- a/debian/control +++ b/debian/control @@ -41,10 +41,10 @@ Build-Depends: libudev-dev [linux-any], flex, bison, - llvm-3.7-dev (>= 1:3.7~+rc2) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el], + llvm-3.8-dev (>= 1:3.8) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el], libelf-dev [amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el], libwayland-dev (>= 1.2.0) [linux-any], - libclang-3.7-dev (>= 1:3.7~+rc2) [amd64 i386 armhf], + libclang-3.8-dev (>= 1:3.8) [amd64 i386 armhf], libclc-dev (>= 0.2.0+git20150813) [amd64 i386 armhf], Vcs-Git: https://anonscm.debian.org/git/pkg-xorg/lib/mesa.git Vcs-Browser: https://anonscm.debian.org/cgit/pkg-xorg/lib/mesa.git diff --git a/debian/rules b/debian/rules index cf40811..ff216dd 100755 --- a/debian/rules +++ b/debian/rules @@ -89,7 +89,7 @@ else ifneq (,$(filter $(DEB_HOST_ARCH),amd64 i386 kfreebsd-amd64 kfreebsd-i386 armhf ppc64el)) GALLIUM_DRIVERS += radeonsi confflags_GALLIUM += --enable-gallium-llvm - confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.7 + confflags_GALLIUM += ac_cv_path_LLVM_CONFIG=llvm-config-3.8 GALLIUM_DRIVERS += swrast else DRI_DRIVERS += swrast commit 6faab1eb3ae95da1b71883be526ba520d2ad36f9 Author: Timo Aaltonen <tjaal...@debian.org> Date: Mon Apr 18 11:21:58 2016 +0300 bump changelog diff --git a/debian/changelog b/debian/changelog index 76bb0a3..cf98bea 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +mesa (11.2.1-1) UNRELEASED; urgency=medium + + * New upstream release. + + -- Timo Aaltonen <tjaal...@debian.org> Mon, 18 Apr 2016 11:20:48 +0300 + mesa (11.2.0-1) experimental; urgency=medium [ Andreas Boll ] commit 21e6440e82808364a6c2cc38ea92651c99b69aad Author: Emil Velikov <emil.veli...@collabora.com> Date: Sun Apr 17 16:03:34 2016 +0100 docs: add release notes for 11.2.1 Signed-off-by: Emil Velikov <emil.veli...@collabora.com> diff --git a/docs/relnotes/11.2.1.html b/docs/relnotes/11.2.1.html new file mode 100644 index 0000000..3100ebb --- /dev/null +++ b/docs/relnotes/11.2.1.html @@ -0,0 +1,118 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html lang="en"> +<head> + <meta http-equiv="content-type" content="text/html; charset=utf-8"> + <title>Mesa Release Notes</title> + <link rel="stylesheet" type="text/css" href="../mesa.css"> +</head> +<body> + +<div class="header"> + <h1>The Mesa 3D Graphics Library</h1> +</div> + +<iframe src="../contents.html"></iframe> +<div class="content"> + +<h1>Mesa 11.2.1 Release Notes / April 17, 2016</h1> + +<p> +Mesa 11.2.1 is a bug fix release which fixes bugs found since the 11.2.0 release. +</p> +<p> +Mesa 11.2.1 implements the OpenGL 4.1 API, but the version reported by +glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) / +glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used. +Some drivers don't support all the features required in OpenGL 4.1. OpenGL +4.1 is <strong>only</strong> available if requested at context creation +because compatibility contexts are not supported. +</p> + + +<h2>SHA256 checksums</h2> +<pre> +TBD +</pre> + + +<h2>New features</h2> +<p>None</p> + +<h2>Bug fixes</h2> + +<p>This list is likely incomplete.</p> + +<ul> + +<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93962">Bug 93962</a> - [HSW, regression, bisected, CTS] ES2-CTS.gtf.GL2FixedTests.scissor.scissor - segfault/asserts</li> + +</ul> + + +<h2>Changes</h2> + +<p>Brian Paul (2):</p> +<ul> + <li>st/mesa: fix glReadBuffer() assertion failure</li> + <li>st/mesa: fix memleak in glDrawPixels cache code</li> +</ul> + +<p>Christian Schmidbauer (1):</p> +<ul> + <li>st/nine: specify WINAPI only for i386 and amd64</li> +</ul> + +<p>Emil Velikov (3):</p> +<ul> + <li>docs: add sha256 checksums for 11.2.0</li> + <li>configure.ac: update the path of the generated files</li> + <li>Update version to 11.2.1</li> +</ul> + +<p>Ilia Mirkin (1):</p> +<ul> + <li>glsl: allow usage of the keyword buffer before GLSL 430 / ESSL 310</li> +</ul> + +<p>Iurie Salomov (1):</p> +<ul> + <li>va: check null context in vlVaDestroyContext</li> +</ul> + +<p>Jason Ekstrand (2):</p> +<ul> + <li>i965/tiled_memcopy: Add aligned mem_copy parameters to the [de]tiling functions</li> + <li>i965/tiled_memcpy: Rework the RGBA -> BGRA mem_copy functions</li> +</ul> + +<p>Kenneth Graunke (3):</p> +<ul> + <li>i965: Fix textureSize() depth value for 1 layer surfaces on Gen4-6.</li> + <li>i965: Use brw->urb.min_vs_urb_entries instead of 32 for BLORP.</li> + <li>glsl: Lower variable indexing of system value arrays unconditionally.</li> +</ul> + +<p>Marek Olšák (1):</p> +<ul> + <li>drirc: add a workaround for blackness in Warsow</li> +</ul> + +<p>Nicolai Hähnle (1):</p> +<ul> + <li>radeonsi: fix bounds check in si_create_vertex_elements</li> +</ul> + +<p>Samuel Pitoiset (1):</p> +<ul> + <li>nv50/ir: do not try to attach JOIN ops to ATOM</li> +</ul> + +<p>Thomas Hindoe Paaboel Andersen (1):</p> +<ul> + <li>st/va: avoid dereference after free in vlVaDestroyImage</li> +</ul> + + +</div> +</body> +</html> commit 696614cbff849dc3644a35307d3f82ea1ec66266 Author: Emil Velikov <emil.veli...@collabora.com> Date: Sun Apr 17 14:51:55 2016 +0100 Update version to 11.2.1 Signed-off-by: Emil Velikov <emil.veli...@collabora.com> diff --git a/VERSION b/VERSION index b85c6c7..dc170a1 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -11.2.0 +11.2.1 commit 106c1facae8f8e2e27d670397d1276958f0143c4 Author: Iurie Salomov <iur...@gmail.com> Date: Tue Apr 12 23:24:30 2016 +0100 va: check null context in vlVaDestroyContext Signed-off-by: Iurie Salomov <iur...@gmail.com> Reviewed-by: Julien Isorce <j.iso...@samsung.com> (cherry picked from commit 047e3264f67bc54365be7b0e163b6910a9e9de3a) Nominated-by: Emil Velikov <emil.veli...@collabora.com> diff --git a/src/gallium/state_trackers/va/context.c b/src/gallium/state_trackers/va/context.c index b25c381..25d587a 100644 --- a/src/gallium/state_trackers/va/context.c +++ b/src/gallium/state_trackers/va/context.c @@ -283,6 +283,10 @@ vlVaDestroyContext(VADriverContextP ctx, VAContextID context_id) drv = VL_VA_DRIVER(ctx); pipe_mutex_lock(drv->mutex); context = handle_table_get(drv->htab, context_id); + if (!context) { + pipe_mutex_unlock(drv->mutex); + return VA_STATUS_ERROR_INVALID_CONTEXT; + } if (context->decoder) { if (u_reduce_video_profile(context->decoder->profile) == commit fbdd845ed213871c221f1076ea83621ebe779f4e Author: Nicolai Hähnle <nicolai.haeh...@amd.com> Date: Tue Apr 12 12:23:31 2016 -0500 radeonsi: fix bounds check in si_create_vertex_elements This was triggered by dEQP-GLES3.functional.vertex_array_objects.all_attributes Cc: "11.1 11.2" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Bas Nieuwenhuizen <b...@basnieuwenhuizen.nl> Reviewed-by: Marek Olšák <marek.ol...@amd.com> (cherry picked from commit a191e6b719848a17963f185954f1696fa5a2bcb1) diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 663ebb3..58573ac 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3284,7 +3284,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, struct si_vertex_element *v = CALLOC_STRUCT(si_vertex_element); int i; - assert(count < SI_MAX_ATTRIBS); + assert(count <= SI_MAX_ATTRIBS); if (!v) return NULL; commit 389d5bbc98f04740bb64ec2e9c94dcab7adcbd6b Author: Brian Paul <bri...@vmware.com> Date: Mon Apr 11 18:54:28 2016 -0600 st/mesa: fix memleak in glDrawPixels cache code If the glDrawPixels size changed, we leaked the previously cached texture, if there was one. This patch fixes the reference counting, adds a refcount assertion check, and better handles potential malloc() failures. Tested with a modified version of the drawpix Mesa demo which changed the image size for each glDrawPixels call. Cc: "11.2" <mesa-sta...@lists.freedesktop.org> Reviewed-by: José Fonseca <jfons...@vmware.com> Reviewed-by: Charmaine Lee <charmai...@vmware.com> (cherry picked from commit 6c014782138634d5d36e1484bf498cef2b2d888f) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 51d4ae5..4f39ed7 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -374,7 +374,7 @@ make_texture(struct st_context *st, struct gl_context *ctx = st->ctx; struct pipe_context *pipe = st->pipe; mesa_format mformat; - struct pipe_resource *pt; + struct pipe_resource *pt = NULL; enum pipe_format pipeFormat; GLenum baseInternalFormat; @@ -393,10 +393,18 @@ make_texture(struct st_context *st, unpack->SkipRows == 0 && unpack->SwapBytes == GL_FALSE && st->drawpix_cache.image) { + assert(st->drawpix_cache.texture); + /* check if the pixel data is the same */ if (memcmp(pixels, st->drawpix_cache.image, width * height * bpp) == 0) { /* OK, re-use the cached texture */ - return st->drawpix_cache.texture; + pipe_resource_reference(&pt, st->drawpix_cache.texture); + /* refcount of returned texture should be at least two here. One + * reference for the cache to hold on to, one for the caller (which + * it will release), and possibly more held by the driver. + */ + assert(pt->reference.count >= 2); + return pt; } } @@ -515,8 +523,14 @@ make_texture(struct st_context *st, st->drawpix_cache.image = malloc(width * height * bpp); if (st->drawpix_cache.image) { memcpy(st->drawpix_cache.image, pixels, width * height * bpp); + pipe_resource_reference(&st->drawpix_cache.texture, pt); + } + else { + /* out of memory, free/disable cached texture */ + st->drawpix_cache.width = 0; + st->drawpix_cache.height = 0; + pipe_resource_reference(&st->drawpix_cache.texture, NULL); } - st->drawpix_cache.texture = pt; } #endif @@ -1150,9 +1164,8 @@ st_DrawPixels(struct gl_context *ctx, GLint x, GLint y, if (num_sampler_view > 1) pipe_sampler_view_reference(&sv[1], NULL); -#if !USE_DRAWPIXELS_CACHE + /* free the texture (but may persist in the cache) */ pipe_resource_reference(&pt, NULL); -#endif } commit 3840e67bf5a5c89fc644816e721d4527557ab3ef Author: Emil Velikov <emil.veli...@collabora.com> Date: Tue Apr 5 14:29:41 2016 +0100 configure.ac: update the path of the generated files ... in order to determine if we need bison/flex. Failing to locate the files will lead to mandating bison/flex even when building from a release tarball. CC: "11.2" <mesa-sta...@lists.freedesktop.org> Signed-off-by: Emil Velikov <emil.veli...@collabora.com> Acked-by: Jason Ekstrand <ja...@jlekstrand.net> Reviewed-by: Matt Turner <matts...@gmail.com> (cherry picked from commit c481c8f7f135d4cf17e35bb5126bdcf6b5611940) diff --git a/configure.ac b/configure.ac index d4ce99a..8109ac4 100644 --- a/configure.ac +++ b/configure.ac @@ -110,10 +110,10 @@ LT_INIT([disable-static]) AC_CHECK_PROG(RM, rm, [rm -f]) AX_PROG_BISON([], - AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"], + AS_IF([test ! -f "$srcdir/src/compiler/glsl/glcpp/glcpp-parse.c"], [AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])])) AX_PROG_FLEX([], - AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-lex.c"], + AS_IF([test ! -f "$srcdir/src/compiler/glsl/glcpp/glcpp-lex.c"], [AC_MSG_ERROR([flex not found - unable to compile glcpp-lex.l])])) AC_CHECK_PROG(INDENT, indent, indent, cat) commit 5b644f3a06da76cdd6d404c98696e6d78c39c0cc Author: Ilia Mirkin <imir...@alum.mit.edu> Date: Sat Apr 9 13:11:42 2016 -0400 glsl: allow usage of the keyword buffer before GLSL 430 / ESSL 310 The GLSL 4.20 and ESSL 3.00 specs don't list 'buffer' as a reserved keyword. Make the parser ignore it unless GLSL 4.30 / ESSL 3.10 are used, or ARB_shader_storage_buffer_objects is enabled. Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> Reviewed-by: Timothy Arceri <timothy.arc...@collabora.com> Cc: mesa-sta...@lists.freedesktop.org (cherry picked from commit 9b5bd20eb2d09e1ec2319b55c83ad7f28b6fefee) diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index 9704fc7..50e862a 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -304,7 +304,7 @@ in return IN_TOK; out return OUT_TOK; inout return INOUT_TOK; uniform return UNIFORM; -buffer return BUFFER; +buffer KEYWORD_WITH_ALT(0, 0, 430, 310, yyextra->ARB_shader_storage_buffer_object_enable, BUFFER); varying DEPRECATED_ES_KEYWORD(VARYING); centroid KEYWORD(120, 300, 120, 300, CENTROID); invariant KEYWORD(120, 100, 120, 100, INVARIANT); commit bbbcad4f0d915c31f25b1beee19cea216ced4796 Author: Jason Ekstrand <jason.ekstr...@intel.com> Date: Thu Apr 7 11:21:19 2016 -0700 i965/tiled_memcpy: Rework the RGBA -> BGRA mem_copy functions This splits the two copy functions into three: One for unaligned copies, one for aligned sources, and one for aligned destinations. Thanks to the previous commit, we are now guaranteed that the aligned ones will *only* operate on aligned memory so they should be safe. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=93962 Cc: "11.1 11.2" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Matt Turner <matts...@gmail.com> Reviewed-by: Roland Scheidegger <srol...@vmware.com> Reviewed-by: Chad Versace <chad.vers...@intel.com> (cherry picked from commit d2b32656e18607f5807b3f4d4dde02568370b9bf) diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 89de594..8082b9d 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -56,21 +56,32 @@ static const uint32_t ytile_width = 128; static const uint32_t ytile_height = 32; static const uint32_t ytile_span = 16; +/** + * Copy RGBA to BGRA - swap R and B. + */ +static inline void * +rgba8_copy(void *dst, const void *src, size_t bytes) +{ + uint8_t *d = dst; + uint8_t const *s = src; + + assert(bytes % 4 == 0); + + while (bytes >= 4) { + d[0] = s[2]; + d[1] = s[1]; + d[2] = s[0]; + d[3] = s[3]; + d += 4; + s += 4; + bytes -= 4; + } + return dst; +} + #ifdef __SSSE3__ static const uint8_t rgba8_permutation[16] = { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 }; - -/* NOTE: dst must be 16-byte aligned. src may be unaligned. */ -#define rgba8_copy_16_aligned_dst(dst, src) \ - _mm_store_si128((__m128i *)(dst), \ - _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(src)), \ - *(__m128i *) rgba8_permutation)) - -/* NOTE: src must be 16-byte aligned. dst may be unaligned. */ -#define rgba8_copy_16_aligned_src(dst, src) \ - _mm_storeu_si128((__m128i *)(dst), \ - _mm_shuffle_epi8(_mm_load_si128((__m128i *)(src)), \ - *(__m128i *) rgba8_permutation)) #endif /** @@ -82,32 +93,21 @@ rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes) uint8_t *d = dst; uint8_t const *s = src; -#ifdef __SSSE3__ - if (bytes == 16) { - assert(!(((uintptr_t)dst) & 0xf)); - rgba8_copy_16_aligned_dst(d+ 0, s+ 0); - return dst; - } + assert(bytes == 0 || !(((uintptr_t)dst) & 0xf)); - if (bytes == 64) { - assert(!(((uintptr_t)dst) & 0xf)); - rgba8_copy_16_aligned_dst(d+ 0, s+ 0); - rgba8_copy_16_aligned_dst(d+16, s+16); - rgba8_copy_16_aligned_dst(d+32, s+32); - rgba8_copy_16_aligned_dst(d+48, s+48); - return dst; +#ifdef __SSSE3__ + while (bytes >= 16) { + _mm_store_si128((__m128i *)d, + _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)s), + *(__m128i *) rgba8_permutation)); + s += 16; + d += 16; + bytes -= 16; } #endif - while (bytes >= 4) { - d[0] = s[2]; - d[1] = s[1]; - d[2] = s[0]; - d[3] = s[3]; - d += 4; - s += 4; - bytes -= 4; - } + rgba8_copy(d, s, bytes); + return dst; } @@ -120,32 +120,21 @@ rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes) uint8_t *d = dst; uint8_t const *s = src; -#ifdef __SSSE3__ - if (bytes == 16) { - assert(!(((uintptr_t)src) & 0xf)); - rgba8_copy_16_aligned_src(d+ 0, s+ 0); - return dst; - } + assert(bytes == 0 || !(((uintptr_t)src) & 0xf)); - if (bytes == 64) { - assert(!(((uintptr_t)src) & 0xf)); - rgba8_copy_16_aligned_src(d+ 0, s+ 0); - rgba8_copy_16_aligned_src(d+16, s+16); - rgba8_copy_16_aligned_src(d+32, s+32); - rgba8_copy_16_aligned_src(d+48, s+48); - return dst; +#ifdef __SSSE3__ + while (bytes >= 16) { + _mm_storeu_si128((__m128i *)d, + _mm_shuffle_epi8(_mm_load_si128((__m128i *)s), + *(__m128i *) rgba8_permutation)); + s += 16; + d += 16; + bytes -= 16; } #endif - while (bytes >= 4) { - d[0] = s[2]; - d[1] = s[1]; - d[2] = s[0]; - d[3] = s[3]; - d += 4; - s += 4; - bytes -= 4; - } + rgba8_copy(d, s, bytes); + return dst; } @@ -404,10 +393,10 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (mem_copy == memcpy) return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + else if (mem_copy == rgba8_copy) return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } else { @@ -415,10 +404,10 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, return linear_to_xtiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + else if (mem_copy == rgba8_copy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } @@ -447,20 +436,20 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (mem_copy == memcpy) return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + else if (mem_copy == rgba8_copy) return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_dst) + else if (mem_copy == rgba8_copy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); + rgba8_copy, rgba8_copy_aligned_dst); else unreachable("not reached"); } @@ -489,20 +478,20 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (mem_copy == memcpy) return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + else if (mem_copy == rgba8_copy) return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src, rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } else { if (mem_copy == memcpy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + else if (mem_copy == rgba8_copy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src, rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } @@ -531,20 +520,20 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (mem_copy == memcpy) return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + else if (mem_copy == rgba8_copy) return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src, rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } else { if (mem_copy == memcpy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); - else if (mem_copy == rgba8_copy_aligned_src) + else if (mem_copy == rgba8_copy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src, rgba8_copy_aligned_src); + rgba8_copy, rgba8_copy_aligned_src); else unreachable("not reached"); } @@ -773,8 +762,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, if (format == GL_BGRA) { *mem_copy = memcpy; } else if (format == GL_RGBA) { - *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst - : rgba8_copy_aligned_src; + *mem_copy = rgba8_copy; } } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) || (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM)) { @@ -783,8 +771,7 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format, /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can * use the same function. */ - *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst - : rgba8_copy_aligned_src; + *mem_copy = rgba8_copy; } else if (format == GL_RGBA) { *mem_copy = memcpy; } commit b7bd6944c8ccfe2b6c0b1c68e16980fcac5cf7f0 Author: Jason Ekstrand <jason.ekstr...@intel.com> Date: Thu Apr 7 10:52:28 2016 -0700 i965/tiled_memcopy: Add aligned mem_copy parameters to the [de]tiling functions Each of the [de]tiling functions has three mem_copy calls: 1) Left edge to tile boundary 2) Tile boundary to tile boundary in a loop 3) Tile boundary to right edge Copies 2 and 3 start at a tile edge so the pointer to tiled memory is guaranteed to be at least 16-byte aligned. Copy 1, on the other hand, starts at some arbitrary place in the tile so it doesn't have any such alignment guarantees. Cc: "11.1 11.2" <mesa-sta...@lists.freedesktop.org> Reviewed-by: Matt Turner <matts...@gmail.com> Reviewed-by: Roland Scheidegger <srol...@vmware.com> Reviewed-by: Chad Versace <chad.vers...@intel.com> (cherry picked from commit f6f54a29ca9bc8c1a1a994ff4e3ee09772de78e4) diff --git a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c index 2383401..89de594 100644 --- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c +++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c @@ -172,6 +172,12 @@ typedef void (*tile_copy_fn)(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * Copy texture data from linear to X tile layout. * * \copydoc tile_copy_fn + * + * The mem_copy parameters allow the user to specify an alternative mem_copy + * function that, for instance, may do RGBA -> BGRA swizzling. The first + * function must handle any memory alignment while the second function must + * only handle 16-byte alignment in whichever side (source or destination) is + * tiled. */ static inline void linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, @@ -179,7 +185,8 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t src_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* The copy destination offset for each range copied is the sum of * an X offset 'x0' or 'xo' and a Y offset 'yo.' @@ -200,10 +207,10 @@ linear_to_xtiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, mem_copy(dst + ((x0 + yo) ^ swizzle), src + x0, x1 - x0); for (xo = x1; xo < x2; xo += xtile_span) { - mem_copy(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + xo, xtile_span); } - mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); src += src_pitch; } @@ -220,7 +227,8 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t src_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* Y tiles consist of columns that are 'ytile_span' wide (and the same height * as the tile). Thus the destination offset for (x,y) is the sum of: @@ -259,12 +267,12 @@ linear_to_ytiled(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * at each step so we don't need to calculate it explicitly. */ for (x = x1; x < x2; x += ytile_span) { - mem_copy(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x, ytile_span); xo += bytes_per_column; swizzle ^= swizzle_bit; } - mem_copy(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); + mem_copy_align16(dst + ((xo + yo) ^ swizzle), src + x2, x3 - x2); src += src_pitch; } @@ -281,7 +289,8 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t dst_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* The copy destination offset for each range copied is the sum of * an X offset 'x0' or 'xo' and a Y offset 'yo.' @@ -302,10 +311,10 @@ xtiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, mem_copy(dst + x0, src + ((x0 + yo) ^ swizzle), x1 - x0); for (xo = x1; xo < x2; xo += xtile_span) { - mem_copy(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span); + mem_copy_align16(dst + xo, src + ((xo + yo) ^ swizzle), xtile_span); } - mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); dst += dst_pitch; } @@ -322,7 +331,8 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, char *dst, const char *src, int32_t dst_pitch, uint32_t swizzle_bit, - mem_copy_fn mem_copy) + mem_copy_fn mem_copy, + mem_copy_fn mem_copy_align16) { /* Y tiles consist of columns that are 'ytile_span' wide (and the same height * as the tile). Thus the destination offset for (x,y) is the sum of: @@ -361,12 +371,12 @@ ytiled_to_linear(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, * at each step so we don't need to calculate it explicitly. */ for (x = x1; x < x2; x += ytile_span) { - mem_copy(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); + mem_copy_align16(dst + x, src + ((xo + yo) ^ swizzle), ytile_span); xo += bytes_per_column; swizzle ^= swizzle_bit; } - mem_copy(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); + mem_copy_align16(dst + x2, src + ((xo + yo) ^ swizzle), x3 - x2); dst += dst_pitch; } @@ -393,26 +403,27 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { if (mem_copy == memcpy) return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, src_pitch, swizzle_bit, memcpy); + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_dst) return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); else unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, memcpy); + dst, src, src_pitch, swizzle_bit, + memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_dst) return linear_to_xtiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); else unreachable("not reached"); } linear_to_xtiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy); + dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -435,26 +446,26 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { if (mem_copy == memcpy) return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, src_pitch, swizzle_bit, memcpy); + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_dst) return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); else unreachable("not reached"); } else { if (mem_copy == memcpy) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, memcpy); + dst, src, src_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_dst) return linear_to_ytiled(x0, x1, x2, x3, y0, y1, dst, src, src_pitch, swizzle_bit, - rgba8_copy_aligned_dst); + rgba8_copy_aligned_dst, rgba8_copy_aligned_dst); else unreachable("not reached"); } linear_to_ytiled(x0, x1, x2, x3, y0, y1, - dst, src, src_pitch, swizzle_bit, mem_copy); + dst, src, src_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -477,26 +488,26 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == xtile_width && y0 == 0 && y1 == xtile_height) { if (mem_copy == memcpy) return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, - dst, src, dst_pitch, swizzle_bit, memcpy); + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_src) return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy_aligned_src, rgba8_copy_aligned_src); else unreachable("not reached"); } else { if (mem_copy == memcpy) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, memcpy); + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_src) return xtiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy_aligned_src, rgba8_copy_aligned_src); else unreachable("not reached"); } xtiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, mem_copy); + dst, src, dst_pitch, swizzle_bit, mem_copy, mem_copy); } /** @@ -519,26 +530,26 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, if (x0 == 0 && x3 == ytile_width && y0 == 0 && y1 == ytile_height) { if (mem_copy == memcpy) return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, - dst, src, dst_pitch, swizzle_bit, memcpy); + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_src) return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy_aligned_src, rgba8_copy_aligned_src); else unreachable("not reached"); } else { if (mem_copy == memcpy) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, - dst, src, dst_pitch, swizzle_bit, memcpy); + dst, src, dst_pitch, swizzle_bit, memcpy, memcpy); else if (mem_copy == rgba8_copy_aligned_src) return ytiled_to_linear(x0, x1, x2, x3, y0, y1, dst, src, dst_pitch, swizzle_bit, - rgba8_copy_aligned_src); + rgba8_copy_aligned_src, rgba8_copy_aligned_src); else unreachable("not reached"); }