Mesa (master): glsl: allow invariant on fragment shader outputs.
Module: Mesa Branch: master Commit: ab8ea1b3d481ff39fbfc2b75a63b24838aaf7476 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab8ea1b3d481ff39fbfc2b75a63b24838aaf7476 Author: Dave Airlie Date: Mon May 23 14:18:03 2016 +1000 glsl: allow invariant on fragment shader outputs. From page 27 (page 33 of the PDF) of the GLSL 1.20 spec: " Only variables output from a vertex shader can be candidates for invariance." But this later changes to: From page 37 (page 43 of the PDF) of the GLSL 1.30 spec: " Only variables output from a shader can be candidates for invariance." We can also find: From page 37 (page 43 of the PDF) of the GLSL 1.30 spec: " Initially, by default, all output variables are allowed to be variant. To force all output variables to be invariant, use the pragma #pragma STDGL invariant(all) before all declarations in a shader. If this pragma is used after the declaration of any variables or functions, then the set of outputs that behave as invariant is undefined. It is an error to use this pragma in a fragment shader." But this needs to be corrected and it is being addressed at: https://cvs.khronos.org/bugzilla/show_bug.cgi?id=16140 Fixes GL45-CTS.shading_language_420pack.qualifier_order. Signed-off-by: Dave Airlie Reviewed-by: Andres Gomez --- src/compiler/glsl/ast_to_hir.cpp | 26 -- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 1e14d27..9c63386 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -2634,6 +2634,28 @@ is_varying_var(ir_variable *var, gl_shader_stage target) } } +static bool +is_allowed_invariant(ir_variable *var, struct _mesa_glsl_parse_state *state) +{ + if (is_varying_var(var, state->stage)) + return true; + + /* From Section 4.6.1 ("The Invariant Qualifier") GLSL 1.20 spec: +* "Only variables output from a vertex shader can be candidates +* for invariance". +*/ + if (!state->is_version(130, 0)) + return false; + + /* +* Later specs remove this language - so allowed invariant +* on fragment shader outputs as well. +*/ + if (state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_out) + return true; + return false; +} /** * Matrix layout qualifiers are only allowed on certain types @@ -4495,7 +4517,7 @@ ast_declarator_list::hir(exec_list *instructions, _mesa_glsl_error(& loc, state, "undeclared variable `%s' cannot be marked " "invariant", decl->identifier); - } else if (!is_varying_var(earlier, state->stage)) { + } else if (!is_allowed_invariant(earlier, state)) { _mesa_glsl_error(&loc, state, "`%s' cannot be marked invariant; interfaces between " "shader stages only.", decl->identifier); @@ -4791,7 +4813,7 @@ ast_declarator_list::hir(exec_list *instructions, } if (this->type->qualifier.flags.q.invariant) { - if (!is_varying_var(var, state->stage)) { + if (!is_allowed_invariant(var, state)) { _mesa_glsl_error(&loc, state, "`%s' cannot be marked invariant; interfaces between " "shader stages only", var->name); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Don' t bail on vertex element processing if we need draw params.
Module: Mesa Branch: master Commit: 8fc5443a2b39aaa8292984f7225a2c7968d568ae URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8fc5443a2b39aaa8292984f7225a2c7968d568ae Author: Kenneth Graunke Date: Mon Dec 19 11:21:27 2016 -0800 i965: Don't bail on vertex element processing if we need draw params. BaseVertex, BaseInstance, DrawID, and some edge flag conditions need vertex buffer and elements structs. We can't bail early in this case. Gen4-7 already do this properly. Gen8+ did not. Thanks to Ilia Mirkin for helping track this down. Cc: mesa-sta...@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99144 Reported-by: Pierre-Eric Pelloux-Prayer Signed-off-by: Kenneth Graunke Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/gen8_draw_upload.c | 34 ++-- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen8_draw_upload.c b/src/mesa/drivers/dri/i965/gen8_draw_upload.c index 69ba8e9..3177f9a 100644 --- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c +++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c @@ -110,6 +110,22 @@ gen8_emit_vertices(struct brw_context *brw) ADVANCE_BATCH(); } + /* Normally we don't need an element for the SGVS attribute because the +* 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an +* element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if +* we're using draw parameters then we need an element for the those +* values. Additionally if there is an edge flag element then the SGVS +* can't be inserted past that so we need a dummy element to ensure that +* the edge flag is the last one. +*/ + const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || +vs_prog_data->uses_baseinstance || +((vs_prog_data->uses_instanceid || + vs_prog_data->uses_vertexid) && + uses_edge_flag)); + const unsigned nr_elements = + brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; + /* If the VS doesn't read any inputs (calculating vertex position from * a state variable for some reason, for example), emit a single pad * VERTEX_ELEMENT struct and bail. @@ -117,7 +133,7 @@ gen8_emit_vertices(struct brw_context *brw) * The stale VB state stays in place, but they don't do anything unless * a VE loads from them. */ - if (brw->vb.nr_enabled == 0) { + if (nr_elements == 0) { BEGIN_BATCH(3); OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2)); OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) | @@ -172,22 +188,6 @@ gen8_emit_vertices(struct brw_context *brw) ADVANCE_BATCH(); } - /* Normally we don't need an element for the SGVS attribute because the -* 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an -* element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if -* we're using draw parameters then we need an element for the those -* values. Additionally if there is an edge flag element then the SGVS -* can't be inserted past that so we need a dummy element to ensure that -* the edge flag is the last one. -*/ - const bool needs_sgvs_element = (vs_prog_data->uses_basevertex || -vs_prog_data->uses_baseinstance || -((vs_prog_data->uses_instanceid || - vs_prog_data->uses_vertexid) && - uses_edge_flag)); - const unsigned nr_elements = - brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid; - /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS, * presumably for VertexID/InstanceID. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: don't attempt to unlock an unlocked debug state mutex
Module: Mesa Branch: master Commit: d74c3e55b3b44d8770827a2af5dfffa10557e3c7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d74c3e55b3b44d8770827a2af5dfffa10557e3c7 Author: Jonathan Gray Date: Mon Dec 12 02:42:16 2016 +1100 mesa: don't attempt to unlock an unlocked debug state mutex Commit 929fcee47e46781c57f2a354ce0a013915c033d1 introduced code that attempts to unlock an unlocked mutex which is undefined behaviour. On OpenBSD this leads to an abort: 0 0x124dadfa96ba in thrkill () at :2 1 0x124dadf3da39 in *_libc_abort () at /usr/src/lib/libc/stdlib/abort.c:52 2 0x124d2c1165b5 in *_libpthread_pthread_mutex_unlock (mutexp=) at /usr/src/lib/librthread/rthread_sync.c:221 3 0x124d279c02e4 in init_attrib_groups (ctx=0x124df0fda000) at main/context.c:825 4 _mesa_initialize_context (ctx=ctx@entry=0x124df0fda000, api=api@entry=API_OPENGL_CORE, visual=visual@entry=0x7f7bdfd0, share_list=share_list@entry=0x0, driverFunctions=driverFunctions@entry=0x7f7bda60) at main/context.c:1204 5 0x124d27b507ec in st_create_context (api=api@entry=API_OPENGL_CORE, pipe=pipe@entry=0x124dc491, visual=visual@entry=0x7f7bdfd0, share=share@entry=0x0, options=options@entry=0x7f7be128) at state_tracker/st_context.c:545 6 0x124d27b8639f in st_api_create_context (stapi=, smapi=0x124d1b608800, attribs=0x7f7be100, error=0x7f7be0fc, shared_stctxi=0x0) at state_tracker/st_manager.c:669 7 0x124d27cc5b9c in dri_create_context (api=, visual=0x124d8a0f8a00, cPriv=0x124de473f240, major_version=, minor_version=, flags=, notify_reset=false, error=0x7f7be2b4, sharedContextPrivate=0x0) at dri_context.c:123 8 0x124d27cc5029 in driCreateContextAttribs (screen=0x124d8a0f8400, api=, config=0x124d8a0f8a00, shared=, num_attribs=, attribs=, error=0x7f7be2b4, data=0x124d77814a00) at dri_util.c:448 9 0x124d8e109b00 in drisw_create_context_attribs (base=0x124df3e08700, config_base=0x124d7a0e7300, shareList=, num_attribs=, attribs=, error=0x7f7be2b4) at drisw_glx.c:476 10 0x124d8e104b4a in glXCreateContextAttribsARB (dpy=0x124d533f, config=0x124d7a0e7300, share_context=0x0, direct=1, attrib_list=0x7f7be300) at create_context.c:78 Signed-off-by: Jonathan Gray Reviewed-by: Eduardo Lima Mitev --- src/mesa/main/debug_output.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/debug_output.c b/src/mesa/main/debug_output.c index 48dbbb3..bc933db 100644 --- a/src/mesa/main/debug_output.c +++ b/src/mesa/main/debug_output.c @@ -1282,14 +1282,13 @@ _mesa_init_debug_output(struct gl_context *ctx) */ struct gl_debug_state *debug = _mesa_lock_debug_state(ctx); if (!debug) { - goto done; + return; } debug->DebugOutput = GL_TRUE; debug->LogToStderr = GL_TRUE; ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT; + _mesa_unlock_debug_state(ctx); } -done: - _mesa_unlock_debug_state(ctx); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Use ir_var_temporary when generating inline functions.
Module: Mesa Branch: master Commit: 62b8bcda1cb7ad18acee7042d40c01b56385e124 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62b8bcda1cb7ad18acee7042d40c01b56385e124 Author: Kenneth Graunke Date: Mon Dec 19 14:32:57 2016 -0800 glsl: Use ir_var_temporary when generating inline functions. We were using ir_var_auto for the inlined function parameter variables, which is wrong, as it suggests that those are real variables declared by the program. Normally this doesn't matter. However, if you called built-ins at global scope, it would pollute the global variable namespace with these new parameter temporaries. If the shader already had variables with those names, the linker might see contradictory global variable declarations and raise an error. Making them temporaries indicates that these are just things generated by the compiler internally. This avoids confusing the linker. Fixes a new Piglit test: glsl-fs-multiple-builtins. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99154 Reported-by: Niels Ole Salscheider Signed-off-by: Kenneth Graunke Reviewed-by: Ilia Mirkin Reviewed-by: Iago Toral Quiroga --- src/compiler/glsl/opt_function_inlining.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/opt_function_inlining.cpp b/src/compiler/glsl/opt_function_inlining.cpp index 62c1f4b..78a726b 100644 --- a/src/compiler/glsl/opt_function_inlining.cpp +++ b/src/compiler/glsl/opt_function_inlining.cpp @@ -164,7 +164,7 @@ ir_call::generate_inline(ir_instruction *next_ir) parameters[i] = NULL; } else { parameters[i] = sig_param->clone(ctx, ht); -parameters[i]->data.mode = ir_var_auto; +parameters[i]->data.mode = ir_var_temporary; /* Remove the read-only decoration because we're going to write * directly to this variable. If the cloned variable is left ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): egl: Check config's surface types in eglCreate*Surface()
Module: Mesa Branch: master Commit: fbb4af96c6b6be08ed93e8d5a704b9f7002642f8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fbb4af96c6b6be08ed93e8d5a704b9f7002642f8 Author: Chad Versace Date: Fri Dec 16 11:00:13 2016 -0800 egl: Check config's surface types in eglCreate*Surface() If the provided EGLConfig does not support the requested surface type, then emit EGL_BAD_MATCH. Fixes dEQP-EGL.functional.negative_api.create_pbuffer_surface on GBM. Cc: "13.0" Reviewed-by: Tapani Pälli --- src/egl/main/eglapi.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/egl/main/eglapi.c b/src/egl/main/eglapi.c index a349992..cab05c2 100644 --- a/src/egl/main/eglapi.c +++ b/src/egl/main/eglapi.c @@ -875,6 +875,9 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config, _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); + if ((conf->SurfaceType & EGL_WINDOW_BIT) == 0) + RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE); + surf = drv->API.CreateWindowSurface(drv, disp, conf, native_window, attrib_list); ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE; @@ -993,6 +996,10 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config, #endif _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); + + if ((conf->SurfaceType & EGL_PIXMAP_BIT) == 0) + RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE); + surf = drv->API.CreatePixmapSurface(drv, disp, conf, native_pixmap, attrib_list); ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE; @@ -1063,6 +1070,9 @@ eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config, _EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE); _EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv); + if ((conf->SurfaceType & EGL_PBUFFER_BIT) == 0) + RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE); + surf = drv->API.CreatePbufferSurface(drv, disp, conf, attrib_list); ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): 99 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7b154ac04d9cacf98631a826c25e7e3756f5cda9 Author: Axel Davy Date: Sun Nov 13 12:41:53 2016 +0100 st/nine: Optimize ColorFill When we lock the whole surface to overwrite it, we can use DISCARD. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9bf1da05d9375f6faf4a3977c7674a1cda9ca0b0 Author: Axel Davy Date: Sun Nov 13 12:40:10 2016 +0100 st/nine: Simplify ColorFill For render targets, NineSurface9_GetSurface is not expected to fail. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=31262bbce085c69ef7a654528f509cb37415b41f Author: Axel Davy Date: Sat Nov 12 23:36:35 2016 +0100 st/nine: use get_pipe_acquire/release when possible Use the acquire/release semantic when we don't need to wait for any pending command. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=22f6d6fbd270333979c282627349eb74c06acd6b Author: Axel Davy Date: Thu Nov 3 23:37:46 2016 +0100 st/nine: Implement Fast path for dynamic buffers and csmt Use the secondary pipe for DISCARD/NOOVERWRITE, which avoids stalling to get the pipe from the worker thread. v2: flush at unmap. This is required for example if the driver does hidden draw calls or copies. In the case of unsynchronized it is probably not required, but it is more safe. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3e8234fff42022b12ff7bdbff052c75c71f5bedb Author: Axel Davy Date: Thu Nov 3 22:12:01 2016 +0100 st/nine: Add secondary pipe for device The secondary pipe will be used for operations that don't need synchronization. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a7eeefd7dd858682244e43eb5cb0d2c84fb0b07 Author: Axel Davy Date: Sat Nov 12 22:24:04 2016 +0100 st/nine: Add nine_context_get_pipe_acquire/release See commit for description. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ddb6f1d2d138109b5716baf4d77683d5bf6cffc8 Author: Axel Davy Date: Tue Dec 13 01:10:15 2016 +0100 st/nine: SYSTEMMEM ignores DISCARD. Tests show SYSTEMMEM should ignore DISCARD. Prevents game bugs with following patches reimplementing DISCARD. Halo is affected. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4f344db8b0fdb08cdcba00d714309f27e073edf8 Author: Axel Davy Date: Sun Nov 6 12:38:38 2016 +0100 st/nine: Upload Managed buffers just before draw call using them Previously we were uploading Managed buffers at the next draw call after they were set dirty. This is not the expected behaviour. Instead upload just before draw call needing the content. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e52aded87febcf422ae80551e5d204be04c89278 Author: Axel Davy Date: Thu Nov 3 21:53:53 2016 +0100 st/nine: Track bindings for buffers Similar code than for textures. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=62068c9d90ceda8a4a6696dee1ec7f7a718b9d36 Author: Axel Davy Date: Sun Nov 6 12:06:22 2016 +0100 st/nine: Fix BASETEX_REGISTER_UPDATE BASETEX_REGISTER_UPDATE was adding the texture to the list of textures to upload in too many cases. tex->base.base.bind will be set to true if the texture is in a stateblock, whereas we want to upload only if bound to the device, which is what bind_count is for. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=804b28cdc461043bba18f1b2b914ef61b9773192 Author: Axel Davy Date: Sun Nov 6 12:05:50 2016 +0100 st/nine: Simplify the logic to bind textures This makes the code more readable. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fef23f6712064416f02211f1517e387169735e0b Author: Patrick Rudolph Date: Wed Nov 2 17:17:21 2016 +0100 st/nine: Use nine_context for resource_copy_region Use nine_context wrapper for resource_copy_region. Enables to offload it with CSMT. Signed-off-by: Patrick Rudolph URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c8913a06b454bd0aca813d2ea362e7d06840216f Author: Patrick Rudolph Date: Thu Oct 27 07:39:03 2016 +0200 st/nine: Use nine_context for blit Enables to offload it with CSMT. Signed-off-by: Patrick Rudolph URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fd57306134b9f4e57e544ecc11971a94d7a90df Author: Patrick Rudolph Date: Fri Oct 28 18:04:10 2016 +0200 st/nine: Add NINE_DEBUG=tid to turn threadid on or off To ease debugging. Signed-o
Mesa (master): 26 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=123e947228241e743a4d0fdb8a911f57a3dd4ef9 Author: Axel Davy Date: Sun Dec 18 19:21:08 2016 +0100 st/nine: Upload on secondary context for Draw*Up Avoid synchronization by using the secondary context for uploading the vertex data for Draw*Up. v2: Rely on u_upload_mgr to use persistent coherent buffers. Do not flush. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ec4e5f630ed68ece3f176b174cfd66eff023904 Author: Axel Davy Date: Tue Dec 13 01:16:21 2016 +0100 st/nine: Dirty MANAGED buffers at Lock time Tests suggest MANAGED buffers are made dirty at Lock time, not at Unlock time. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bad7f7cc638391acc93f070fce3e1b7035dc48f8 Author: Axel Davy Date: Sun Dec 4 20:34:59 2016 +0100 st/nine: Implement new buffer upload path This new buffer upload path enables to lock faster than the normal path when using DISCARD/NOOVERWRITE. v2: Diverse cleanups and fixes. v3: Fix allocation size for 'lone' buffers and add more debug info. v4: Rewrite of the path to handle when DISCARD/NOOVERWRITE is not used anymore. The resource content is copied to the new resource used. v5: flush for safety after unmap (not sure it is really required here, but safer to flush). v6: Do not use the path if persistent coherent mapping is unavailable. Fix buffer creation flags. v7: Do not flush since it is not needed. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8960be0e939bc6c0f9f34c5d4a16891af7e325fd Author: Axel Davy Date: Sun Dec 4 19:23:11 2016 +0100 st/nine: Allow non-zero resource offset for vertex buffers Next patches will introduce an offset. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1e64be6f91652e7e5ba60c86c379ee41bb7e4f81 Author: Axel Davy Date: Sun Dec 4 10:34:34 2016 +0100 st/nine: Do not wait for DEFAULT lock for volumes when we can If the volumes (and the texture container) are not referenced, then they are no pending operations on them. We can lock directly. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b4f16615ef6546c6f8eab8a2d2a50cdab1773a91 Author: Axel Davy Date: Sun Dec 4 10:33:19 2016 +0100 st/nine: Do not wait for DEFAULT lock for surfaces when we can If the surfaces (and the texture container) are not referenced, then they are no pending operations on them. We can lock directly. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=525a1b292a3d65857db06832e175fba840b75ff8 Author: Axel Davy Date: Sun Dec 4 10:32:01 2016 +0100 st/nine: Add arguments to context's blit and copy_region The new arguments enable to reference the objects while the function hasn't run. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=325324c749c2331162e7889efc335bfcfcd27005 Author: Axel Davy Date: Sun Dec 4 01:13:25 2016 +0100 st/nine: Idem for nine_context_gen_mipmap Will enable to use the bind count as an information for whether the surface/volume is used in the worker thread. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7089d88199d607d8a135a9605f14ec6393134205 Author: Axel Davy Date: Sun Dec 4 01:10:34 2016 +0100 st/nine: Bind destination for surface/volume uploads Will enable to use the bind count as an information for whether the surface/volume is used in the worker thread. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4a9b21feb890c149f0aa44b0bb6ea63a9932925 Author: Axel Davy Date: Sun Dec 4 00:29:16 2016 +0100 st/nine: Use nine_context_box_upload for volumes Use nine_context_box_upload for uploads: . systemmem volume to default volume . managed volume internal content to its resource. Check the uploads are executed before any action that can alter the data, that is LockBox and volume destruction. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f04263923132bcf0425e1297da460543e93a95fe Author: Axel Davy Date: Mon Dec 5 00:12:52 2016 +0100 st/nine: Fix leak with volume dtor The last level was not released. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=76e392d85239718e6a7b20c914b4827db2828b40 Author: Axel Davy Date: Mon Dec 5 00:12:07 2016 +0100 st/nine: Fix leak with cubetexture dtor The last level was not released. Signed-off-by: Axel Davy URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fec0b7f0672
Mesa (master): gallivm: (trivial) handle non-aligned fetch for lp_build_fetch_rgba_soa
Module: Mesa Branch: master Commit: d7d23aee4b41de64781fe249ca9a99a9166b097d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7d23aee4b41de64781fe249ca9a99a9166b097d Author: Roland Scheidegger Date: Sun Dec 11 23:37:30 2016 +0100 gallivm: (trivial) handle non-aligned fetch for lp_build_fetch_rgba_soa soa fetch so far always assumed that data was aligned. However, we want to use this for vertex fetch, and data might not be aligned there, so handle it in this path too (basically just pass through alignment through to other functions). (It looks like it wouldn't work for for cached s3tc but this is no different than with AoS fetch.) Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_format.h | 1 + src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 15 +-- src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format.h b/src/gallium/auxiliary/gallivm/lp_bld_format.h index 5c866f4..6540caa 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h @@ -143,6 +143,7 @@ void lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, +boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offsets, LLVMValueRef i, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 7444c51..389bfa0 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -349,6 +349,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm, * * \param type the desired return type for 'rgba'. The vector length * is the number of texels to fetch + * \param aligned if the offset is guaranteed to be aligned to element width * * \param base_ptr points to the base of the texture mip tree. * \param offsetoffset to start of the texture image block. For non- @@ -365,6 +366,7 @@ void lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type type, +boolean aligned, LLVMValueRef base_ptr, LLVMValueRef offset, LLVMValueRef i, @@ -402,7 +404,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, type.length, format_desc->block.bits, type.width, - TRUE, + aligned, base_ptr, offset, FALSE); /* @@ -428,7 +430,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, packed = lp_build_gather(gallivm, type.length, format_desc->block.bits, - type.width, TRUE, + type.width, aligned, base_ptr, offset, FALSE); if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) { lp_build_r11g11b10_to_float(gallivm, packed, rgba_out); @@ -456,14 +458,14 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, LLVMValueRef s_offset = lp_build_const_int_vec(gallivm, type, 4); offset = LLVMBuildAdd(builder, offset, s_offset, ""); packed = lp_build_gather(gallivm, type.length, 32, type.width, - TRUE, base_ptr, offset, FALSE); + aligned, base_ptr, offset, FALSE); packed = LLVMBuildAnd(builder, packed, lp_build_const_int_vec(gallivm, type, mask), ""); } else { assert (format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); packed = lp_build_gather(gallivm, type.length, 32, type.width, - TRUE, base_ptr, offset, TRUE); + aligned, base_ptr, offset, TRUE); packed = LLVMBuildBitCast(builder, packed, lp_build_vec_type(gallivm, type), ""); } @@ -489,7 +491,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, tmp_type.norm = TRUE; tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, -TRUE, base_ptr, offset, i, j, cache); +aligned, base_ptr, offset, i, j, cache); lp_build_rgba8_to_fi32_soa(gallivm, type, @@ -509,6 +511,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, const struct util_format_descripti
Mesa (master): draw: use SoA fetch, not AoS one
Module: Mesa Branch: master Commit: e827d9175675aaa6cfc0b981e2a80685fb7b3a74 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e827d9175675aaa6cfc0b981e2a80685fb7b3a74 Author: Roland Scheidegger Date: Wed Dec 21 04:43:07 2016 +0100 draw: use SoA fetch, not AoS one Now that there's some SoA fetch which never falls back, we should always get results which are better or at least not worse (something like rgba32f will stay the same). For cases which get way better, think something like R16_UNORM with 8-wide vectors: this was 8 sign-extend fetches, 8 cvt, 8 muls, followed by a couple of shuffles to stitch things together (if it is smart enough, 6 unpacks) and then a (8-wide) transpose (not sure if llvm could even optimize the shuffles + transpose, since the 16bit values were actually sign-extended to 128bit before being cast to a float vec, so that would be another 8 unpacks). Now that is just 8 fetches (directly inserted into vector, albeit there's one 128bit insert needed), 1 cvt, 1 mul. v2: ditch the old AoS code instead of just disabling it. Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_llvm.c | 71 +++--- 1 file changed, 23 insertions(+), 48 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 19b75a5..8952dc8 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -713,39 +713,6 @@ fetch_instanced(struct gallivm_state *gallivm, static void -convert_to_soa(struct gallivm_state *gallivm, - LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32], - LLVMValueRef dst_soa[TGSI_NUM_CHANNELS], - const struct lp_type soa_type) -{ - unsigned j, k; - struct lp_type aos_channel_type = soa_type; - - LLVMValueRef aos_channels[TGSI_NUM_CHANNELS]; - unsigned pixels_per_channel = soa_type.length / TGSI_NUM_CHANNELS; - - debug_assert(TGSI_NUM_CHANNELS == 4); - debug_assert((soa_type.length % TGSI_NUM_CHANNELS) == 0); - - aos_channel_type.length >>= 1; - - for (j = 0; j < TGSI_NUM_CHANNELS; ++j) { - LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 }; - - assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); - - for (k = 0; k < pixels_per_channel; ++k) { - channel[k] = src_aos[j + TGSI_NUM_CHANNELS * k]; - } - - aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); - } - - lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa); -} - - -static void fetch_vector(struct gallivm_state *gallivm, const struct util_format_description *format_desc, struct lp_type vs_type, @@ -755,11 +722,10 @@ fetch_vector(struct gallivm_state *gallivm, LLVMValueRef *inputs, LLVMValueRef indices) { - LLVMValueRef zero = LLVMConstNull(LLVMInt32TypeInContext(gallivm->context)); LLVMBuilderRef builder = gallivm->builder; struct lp_build_context blduivec; + struct lp_type fetch_type = vs_type; LLVMValueRef offset, valid_mask; - LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; unsigned i; lp_build_context_init(&blduivec, gallivm, lp_uint_type(vs_type)); @@ -783,28 +749,37 @@ fetch_vector(struct gallivm_state *gallivm, } /* -* Note: we probably really want to use SoA fetch, not AoS one (albeit -* for most formats it will amount to the same as this isn't very -* optimized). But looks dangerous since it assumes alignment. +* Unlike fetch_instanced, use SoA fetch instead of multiple AoS fetches. +* This should always produce better code. */ - for (i = 0; i < vs_type.length; i++) { - LLVMValueRef offset1, elem; - elem = lp_build_const_int32(gallivm, i); - offset1 = LLVMBuildExtractElement(builder, offset, elem, ""); - aos_fetch[i] = lp_build_fetch_rgba_aos(gallivm, format_desc, - lp_float32_vec4_type(), - FALSE, map_ptr, offset1, - zero, zero, NULL); + /* The type handling is annoying here... */ + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && + format_desc->channel[0].pure_integer) { + if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { + fetch_type = lp_type_int_vec(vs_type.width, vs_type.width * vs_type.length); + } + else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { + fetch_type = lp_type_uint_vec(vs_type.width, vs_type.width * vs_type.length); + } } - convert_to_soa(gallivm, aos_fetch, inputs, vs_type); + + lp_build_fetch_rgba_soa(gallivm, format_desc, + fetch_type, FALSE, map_ptr, offset, + blduivec.zero, blduivec.zero, + NULL, inputs); for (i = 0; i < TGSI_NUM_CHANNELS; i++) { + inputs[i] = L
Mesa (master): gallivm: generalize the compressed format soa fetch a bit
Module: Mesa Branch: master Commit: cb81460dcc61da0fb5ce066ee435c56840c0aba3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb81460dcc61da0fb5ce066ee435c56840c0aba3 Author: Roland Scheidegger Date: Wed Dec 21 04:30:02 2016 +0100 gallivm: generalize the compressed format soa fetch a bit This can now handle rgtc (unorm) too - this path no longer handles plain formats, but that's unnecessary they now all have their proper SoA unpack (this will still be dog-slow though due to the actual fetch being per-pixel util fallbacks). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 86 +-- 1 file changed, 49 insertions(+), 37 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 34da869..cd17040 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -733,64 +733,69 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, /* * Try calling lp_build_fetch_rgba_aos for all pixels. +* Should only really hit subsampled, compressed +* (for s3tc srgb too, for rgtc the unorm ones only) by now. +* (This is invalid for plain 8unorm formats because we're lazy with +* the swizzle since some results would arrive swizzled, some not.) */ - if (util_format_fits_8unorm(format_desc) && + if ((format_desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) && + (util_format_fits_8unorm(format_desc) || +format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) && type.floating && type.width == 32 && (type.length == 1 || (type.length % 4 == 0))) { struct lp_type tmp_type; - LLVMValueRef tmp; + struct lp_build_context bld; + LLVMValueRef packed, rgba[4]; + const struct util_format_description *flinear_desc; + const struct util_format_description *frgba8_desc; + unsigned chan; + lp_build_context_init(&bld, gallivm, type); + + /* + * Make sure the conversion in aos really only does convert to rgba8 + * and not anything more (so use linear format, adjust type). + */ + flinear_desc = util_format_description(util_format_linear(format)); memset(&tmp_type, 0, sizeof tmp_type); tmp_type.width = 8; tmp_type.length = type.length * 4; tmp_type.norm = TRUE; - tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, -aligned, base_ptr, offset, i, j, cache); + packed = lp_build_fetch_rgba_aos(gallivm, flinear_desc, tmp_type, + aligned, base_ptr, offset, i, j, cache); + packed = LLVMBuildBitCast(builder, packed, bld.int_vec_type, ""); - lp_build_rgba8_to_fi32_soa(gallivm, -type, -tmp, -rgba_out); - - return; - } - - if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && - /* non-srgb case is already handled above */ - format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && - type.floating && type.width == 32 && - (type.length == 1 || (type.length % 4 == 0)) && - cache) { - const struct util_format_description *format_decompressed; - const struct util_format_description *flinear_desc; - LLVMValueRef packed; - flinear_desc = util_format_description(util_format_linear(format_desc->format)); - /* This probably only works with aligned data */ - packed = lp_build_fetch_cached_texels(gallivm, -flinear_desc, -type.length, -base_ptr, -offset, -i, j, -cache); - packed = LLVMBuildBitCast(builder, packed, -lp_build_int_vec_type(gallivm, type), ""); /* - * The values are now packed so they match ordinary srgb RGBA8 format, + * The values are now packed so they match ordinary (srgb) RGBA8 format, * hence need to use matching format for unpack. */ - format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); - + frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_UNORM); + if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { + assert(format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC); + frgba8_desc = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB); + } lp_build_unpack_rgba_soa(gallivm, - format_decompressed, + frgba8_desc, type, - packed, rgba_out); + packed, rgba); + /* + * We conv
Mesa (master): gallivm: optimize SoA AoS fallback fetch path a little
Module: Mesa Branch: master Commit: 5b950319ced820ee112f38f69b5694179c15815d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b950319ced820ee112f38f69b5694179c15815d Author: Roland Scheidegger Date: Wed Dec 21 04:23:55 2016 +0100 gallivm: optimize SoA AoS fallback fetch path a little We should do transpose, not extract/insert, at least with "sufficient" amount of channels (for 4 channels, extract/insert shuffles generated otherwise look truly terrifying). Albeit we shouldn't fallback to that so often in any case. v2: ditch the extract/insert path, not worth keeping (we're going to avoid hitting the fallback that often with future patches). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 68 +++ 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index 389bfa0..a48d71f 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -40,6 +40,39 @@ #include "lp_bld_debug.h" #include "lp_bld_format.h" #include "lp_bld_arit.h" +#include "lp_bld_pack.h" + + +static void +convert_to_soa(struct gallivm_state *gallivm, + LLVMValueRef src_aos[LP_MAX_VECTOR_WIDTH / 32], + LLVMValueRef dst_soa[4], + const struct lp_type soa_type) +{ + unsigned j, k; + struct lp_type aos_channel_type = soa_type; + + LLVMValueRef aos_channels[4]; + unsigned pixels_per_channel = soa_type.length / 4; + + debug_assert((soa_type.length % 4) == 0); + + aos_channel_type.length >>= 1; + + for (j = 0; j < 4; ++j) { + LLVMValueRef channel[LP_MAX_VECTOR_LENGTH] = { 0 }; + + assert(pixels_per_channel <= LP_MAX_VECTOR_LENGTH); + + for (k = 0; k < pixels_per_channel; ++k) { + channel[k] = src_aos[j + 4 * k]; + } + + aos_channels[j] = lp_build_concat(gallivm, channel, aos_channel_type, pixels_per_channel); + } + + lp_build_transpose_aos(gallivm, soa_type, aos_channels, dst_soa); +} void @@ -48,9 +81,6 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, const LLVMValueRef *unswizzled, LLVMValueRef swizzled_out[4]) { - assert(PIPE_SWIZZLE_0 == (int)PIPE_SWIZZLE_0); - assert(PIPE_SWIZZLE_1 == (int)PIPE_SWIZZLE_1); - if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { enum pipe_swizzle swizzle; LLVMValueRef depth_or_stencil; @@ -545,27 +575,30 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, */ { - unsigned k, chan; + unsigned k; struct lp_type tmp_type; + LLVMValueRef aos_fetch[LP_MAX_VECTOR_WIDTH / 32]; if (gallivm_debug & GALLIVM_DEBUG_PERF) { - debug_printf("%s: scalar unpacking of %s\n", + debug_printf("%s: AoS fetch fallback for %s\n", __FUNCTION__, format_desc->short_name); } tmp_type = type; tmp_type.length = 4; - for (chan = 0; chan < 4; ++chan) { - rgba_out[chan] = lp_build_undef(gallivm, type); - } + /* + * Note that vector transpose can be worse compared to insert/extract + * for aos->soa conversion (for formats with 1 or 2 channels). However, + * we should try to avoid getting here for just about all formats, so + * don't bother. + */ /* loop over number of pixels */ for(k = 0; k < type.length; ++k) { LLVMValueRef index = lp_build_const_int32(gallivm, k); LLVMValueRef offset_elem; LLVMValueRef i_elem, j_elem; - LLVMValueRef tmp; offset_elem = LLVMBuildExtractElement(builder, offset, index, ""); @@ -574,20 +607,11 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm, j_elem = LLVMBuildExtractElement(builder, j, index, ""); /* Get a single float[4]={R,G,B,A} pixel */ - tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, - aligned, base_ptr, offset_elem, - i_elem, j_elem, cache); + aos_fetch[k] = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type, +aligned, base_ptr, offset_elem, +i_elem, j_elem, cache); - /* - * Insert the AoS tmp value channels into the SoA result vectors at - * position = 'index'. - */ - for (chan = 0; chan < 4; ++chan) { -LLVMValueRef chan_val = lp_build_const_int32(gallivm, chan), -tmp_chan = LLVMBuildExtractElement(builder, tmp, chan_val, ""); -rgba_out[chan] = LLVMBuildInsertElement(builder, rgba_out[chan], -tmp_chan, index, "")
Mesa (master): gallivm: provide soa fetch path handling formats with more than 32bit
Module: Mesa Branch: master Commit: 3c98e3cd63012246346e6054c5c16d368f899062 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c98e3cd63012246346e6054c5c16d368f899062 Author: Roland Scheidegger Date: Sun Dec 11 23:41:07 2016 +0100 gallivm: provide soa fetch path handling formats with more than 32bit This previously always fell back to AoS conversion. Even for 4-float formats (which is the optimal case by far for that fallback case) this was suboptimal, since it meant the conversion couldn't be done with 256bit vectors. While this may still only be partly possible for some formats, (unless there's AVX2 support) at least the transpose can be done with half the unpacks (and before using the transpose for AoS fallbacks, it was worse still). With less than 4 channels, things got way worse with the AoS fallback quickly even with 128bit vectors. The strategy is pretty much the same as the existing one for formats which fit into 32 bits, except there's now multiple vectors to be fetched (2 or 4 to be exact), which need to be shuffled first (if it's 4 vectors, this amounts to a transpose, for 2 it's a bit different), then the unpack is done the same (with the exception that the shift of the channels is now modulo 32, and we need to select the right vector). In fact the most complex part about it is to get the shuffles right for separating into lo/hi parts for AVX/AVX2... This also makes use of the new ability of gather to use provided type information, which we abuse to outsmart llvm so we get decent shuffles, and to fetch 3x32bit vectors without having to ZExt the scalar. And just because we can, we handle double formats too, albeit they are a bit different (draw sometimes needs to handle that). v2: fix typo float/int bug (generating inefficient code). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 529 +++--- 1 file changed, 375 insertions(+), 154 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c index b3bc155..34da869 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c @@ -31,6 +31,7 @@ #include "util/u_format.h" #include "util/u_memory.h" #include "util/u_string.h" +#include "util/u_math.h" #include "lp_bld_type.h" #include "lp_bld_const.h" @@ -113,6 +114,166 @@ lp_build_format_swizzle_soa(const struct util_format_description *format_desc, } + +static LLVMValueRef +lp_build_extract_soa_chan(struct lp_build_context *bld, + unsigned blockbits, + boolean srgb_chan, + struct util_format_channel_description chan_desc, + LLVMValueRef packed) +{ + struct gallivm_state *gallivm = bld->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type type = bld->type; + LLVMValueRef input = packed; + const unsigned width = chan_desc.size; + const unsigned start = chan_desc.shift; + const unsigned stop = start + width; + + /* Decode the input vector component */ + + switch(chan_desc.type) { + case UTIL_FORMAT_TYPE_VOID: + input = bld->undef; + break; + + case UTIL_FORMAT_TYPE_UNSIGNED: + /* + * Align the LSB + */ + if (start) { + input = LLVMBuildLShr(builder, input, + lp_build_const_int_vec(gallivm, type, start), ""); + } + + /* + * Zero the MSBs + */ + if (stop < blockbits) { + unsigned mask = ((unsigned long long)1 << width) - 1; + input = LLVMBuildAnd(builder, input, + lp_build_const_int_vec(gallivm, type, mask), ""); + } + + /* + * Type conversion + */ + if (type.floating) { + if (srgb_chan) { +struct lp_type conv_type = lp_uint_type(type); +input = lp_build_srgb_to_linear(gallivm, conv_type, width, input); + } + else { +if(chan_desc.normalized) + input = lp_build_unsigned_norm_to_float(gallivm, width, type, input); +else + input = LLVMBuildSIToFP(builder, input, bld->vec_type, ""); + } + } + else if (chan_desc.pure_integer) { + /* Nothing to do */ + } else { + /* FIXME */ + assert(0); + } + break; + + case UTIL_FORMAT_TYPE_SIGNED: + /* + * Align the sign bit first. + */ + if (stop < type.width) { + unsigned bits = type.width - stop; + LLVMValueRef bits_val = lp_build_const_int_vec(gallivm, type, bits); + input = LLVMBuildShl(builder, input, bits_val, ""); + } + + /* + * Align the LSB (with an arithmetic shift to preserve the sign) + */ + if (chan_desc.size < type.width) { + unsigned bits = type.width - chan_desc.size; + LLVMValueRef bits_val = lp_buil
Mesa (master): gallivm: optimize gather a bit, by using supplied destination type
Module: Mesa Branch: master Commit: 8bd67a35c50e68c21aed043de11e095c284d151a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bd67a35c50e68c21aed043de11e095c284d151a Author: Roland Scheidegger Date: Sun Dec 11 23:39:22 2016 +0100 gallivm: optimize gather a bit, by using supplied destination type By using a dst_type in the the gather interface, gather has some more knowledge about how values should be fetched. E.g. if this is a 3x32bit fetch and dst_type is 4x32bit vector gather will no longer do a ZExt with a 96bit scalar value to 128bit, but just fetch the 96bit as 3x32bit vector (this is still going to be 2 loads of course, but the loads can be done directly to simd vector that way). Also, we can now do some try to use the right int/float type. This should make no difference really since there's typically no domain transition penalties for such simd loads, however it actually makes a difference since llvm will use different shuffle lowering afterwards so the caller can use this to trick llvm into using sane shuffle afterwards (and yes llvm is really stupid there - nothing against using the shuffle instruction from the correct domain, but not at the cost of doing 3 times more shuffles, the case which actually matters is refusal to use shufps for integer values). Also do some attempt to avoid things which look great on paper but llvm doesn't really handle (e.g. fetching 3-element 8 bit and 16 bit vectors which is simply disastrous - I suspect type legalizer is to blame trying to extend these vectors to 128bit types somehow, so fetching these with scalars like before which is suboptimal due to the ZExt). Remove the ability for truncation (no point, this is gather, not conversion) as it is complex enough already. While here also implement not just the float, but also the 64bit avx2 gathers (disabled though since based on the theoretical numbers the benefit just isn't there at all until Skylake at least). Reviewed-by: Jose Fonseca --- src/gallium/auxiliary/draw/draw_llvm.c | 2 +- src/gallium/auxiliary/gallivm/lp_bld_format_aos.c | 5 +- .../auxiliary/gallivm/lp_bld_format_aos_array.c| 9 +- src/gallium/auxiliary/gallivm/lp_bld_format_soa.c | 23 +- src/gallium/auxiliary/gallivm/lp_bld_format_yuv.c | 4 +- src/gallium/auxiliary/gallivm/lp_bld_gather.c | 359 + src/gallium/auxiliary/gallivm/lp_bld_gather.h | 2 +- src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 8 +- 8 files changed, 333 insertions(+), 79 deletions(-) diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index c548572..19b75a5 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -1864,7 +1864,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMPointerType(LLVMInt8TypeInContext(context), 0), ""); tmp = lp_build_gather(gallivm, vs_type.length, - 32, 32, TRUE, + 32, bld.type, TRUE, fetch_elts, tmp, FALSE); LLVMBuildStore(builder, tmp, index_store); } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c index 9f6b9e9..322e7b8 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c @@ -224,6 +224,7 @@ lp_build_unpack_arith_rgba_aos(struct gallivm_state *gallivm, /* Ex: convert packed = {XYZW, XYZW, XYZW, XYZW} * into masked = {X, Y, Z, W} */ + /* Note: we cannot do this shift on x86 natively until AVX2. */ shifted = LLVMBuildLShr(builder, packed, LLVMConstVector(shifts, 4), ""); masked = LLVMBuildAnd(builder, shifted, LLVMConstVector(masks, 4), ""); @@ -394,6 +395,7 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, util_is_power_of_two(format_desc->block.bits)) { LLVMValueRef packed; LLVMTypeRef dst_vec_type = lp_build_vec_type(gallivm, type); + struct lp_type fetch_type; unsigned vec_len = type.width * type.length; /* @@ -401,8 +403,9 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm, * scaling or converting. */ + fetch_type = lp_type_uint(type.width*4); packed = lp_build_gather(gallivm, type.length/4, - format_desc->block.bits, type.width*4, + format_desc->block.bits, fetch_type, aligned, base_ptr, offset, TRUE); assert(format_desc->block.bits <= vec_len); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c b/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c index 8cad3a6..636a4a6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos_array.c +++ b/src/gallium
Mesa (master): svga: Fix a strict-aliasing violation in shader dumper
Module: Mesa Branch: master Commit: 8801734da701d95608e39d829e0a31a191ee68f2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8801734da701d95608e39d829e0a31a191ee68f2 Author: Edward O'Callaghan Date: Tue Dec 6 11:28:56 2016 +1100 svga: Fix a strict-aliasing violation in shader dumper As per the C spec, it is illegal to alias pointers to different types. This results in undefined behaviour after optimization passes, resulting in very subtle bugs that happen only on a full moon.. Use a memcpy() as a well defined coercion between the isomorphic bit-field interpretations of memory. V.2: Use C99 compat STATIC_ASSERT() over C11 static_assert(). Signed-off-by: Edward O'Callaghan Reviewed-by: Charmaine Lee --- src/gallium/drivers/svga/svgadump/svga_shader_dump.c | 10 +- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c index 4ee1bf2..46126a5 100644 --- a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c +++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c @@ -30,6 +30,9 @@ * @author Michal Krol */ +#include +#include + #include "svga_shader.h" #include "svga_shader_dump.h" #include "svga_shader_op.h" @@ -413,6 +416,11 @@ dump_dstreg(struct sh_dstreg dstreg, static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di ) { + struct sh_reg srcreg_sh = {0}; + /* bit-fields carefully aligned, ensure they stay that way. */ + STATIC_ASSERT(sizeof(struct sh_reg) == sizeof(struct sh_srcreg)); + memcpy(&srcreg_sh, &srcreg, sizeof(srcreg_sh)); + switch (srcreg.modifier) { case SVGA3DSRCMOD_NEG: case SVGA3DSRCMOD_BIASNEG: @@ -427,7 +435,7 @@ static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, cons case SVGA3DSRCMOD_NOT: _debug_printf( "!" ); } - dump_reg( *(struct sh_reg *) &srcreg, indreg, di ); + dump_reg(srcreg_sh, indreg, di ); switch (srcreg.modifier) { case SVGA3DSRCMOD_NONE: case SVGA3DSRCMOD_NEG: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit