Mesa (master): radeonsi: Cache LLVMTargetMachineRef in context instead of in screen
Module: Mesa Branch: master Commit: d64adc3a79e419062432cfa8d1cbc437676a3fbd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d64adc3a79e419062432cfa8d1cbc437676a3fbd Author: Michel Dänzer michel.daen...@amd.com Date: Thu Mar 26 11:32:59 2015 +0900 radeonsi: Cache LLVMTargetMachineRef in context instead of in screen Fixes a crash in genymotion with several threads compiling shaders concurrently. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89746 Cc: 10.5 mesa-sta...@lists.freedesktop.org Reviewed-by: Tom Stellard thomas.stell...@amd.com --- src/gallium/drivers/radeonsi/si_compute.c |3 +- src/gallium/drivers/radeonsi/si_pipe.c | 43 --- src/gallium/drivers/radeonsi/si_pipe.h |3 +- src/gallium/drivers/radeonsi/si_shader.c| 13 --- src/gallium/drivers/radeonsi/si_shader.h|5 +-- src/gallium/drivers/radeonsi/si_state_shaders.c |4 ++- 6 files changed, 41 insertions(+), 30 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 8609b89..89bef2e 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -130,7 +130,8 @@ static void *si_create_compute_state( for (i = 0; i program-num_kernels; i++) { LLVMModuleRef mod = radeon_llvm_get_kernel_module(program-llvm_ctx, i, code, header-num_bytes); - si_compile_llvm(sctx-screen, program-kernels[i], mod); + si_compile_llvm(sctx-screen, program-kernels[i], sctx-tm, + mod); LLVMDisposeModule(mod); } } diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index d335bda..0eada72 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -69,6 +69,11 @@ static void si_destroy_context(struct pipe_context *context) si_pm4_cleanup(sctx); r600_common_context_cleanup(sctx-b); + +#if HAVE_LLVM = 0x0306 + LLVMDisposeTargetMachine(sctx-tm); +#endif + FREE(sctx); } @@ -77,6 +82,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * struct si_context *sctx = CALLOC_STRUCT(si_context); struct si_screen* sscreen = (struct si_screen *)screen; struct radeon_winsys *ws = sscreen-b.ws; + LLVMTargetRef r600_target; +#if HAVE_LLVM = 0x0306 + const char *triple = amdgcn--; +#else + const char *triple = r600--; +#endif int shader, i; if (sctx == NULL) @@ -170,6 +181,17 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * */ sctx-scratch_waves = 32 * sscreen-b.info.max_compute_units; +#if HAVE_LLVM = 0x0306 + /* Initialize LLVM TargetMachine */ + r600_target = radeon_llvm_get_r600_target(triple); + sctx-tm = LLVMCreateTargetMachine(r600_target, triple, + r600_get_llvm_processor_name(sscreen-b.family), + +DumpCode,+vgpr-spilling, + LLVMCodeGenLevelDefault, + LLVMRelocDefault, + LLVMCodeModelDefault); +#endif + return sctx-b.b; fail: si_destroy_context(sctx-b.b); @@ -445,12 +467,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen) if (!sscreen-b.ws-unref(sscreen-b.ws)) return; -#if HAVE_LLVM = 0x0306 - // r600_destroy_common_screen() frees sscreen, so we need to make - // sure to dispose the TargetMachine before we call it. - LLVMDisposeTargetMachine(sscreen-tm); -#endif - r600_destroy_common_screen(sscreen-b); } @@ -508,12 +524,7 @@ static bool si_initialize_pipe_config(struct si_screen *sscreen) struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) { struct si_screen *sscreen = CALLOC_STRUCT(si_screen); - LLVMTargetRef r600_target; -#if HAVE_LLVM = 0x0306 - const char *triple = amdgcn--; -#else - const char *triple = r600--; -#endif + if (sscreen == NULL) { return NULL; } @@ -541,13 +552,5 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws) /* Create the auxiliary context. This must be done last. */ sscreen-b.aux_context = sscreen-b.b.context_create(sscreen-b.b, NULL); -#if HAVE_LLVM = 0x0306 - /* Initialize LLVM TargetMachine */ - r600_target = radeon_llvm_get_r600_target(triple); - sscreen-tm = LLVMCreateTargetMachine(r600_target, triple, - r600_get_llvm_processor_name(sscreen-b.family), -
Mesa (master): glsl: fail when a shader' s input var has not an equivalent out var in previous
Module: Mesa Branch: master Commit: 18004c338f6be8af2e36d2f54972c60136229aeb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=18004c338f6be8af2e36d2f54972c60136229aeb Author: Samuel Iglesias Gonsalvez sigles...@igalia.com Date: Fri Nov 28 11:23:20 2014 +0100 glsl: fail when a shader's input var has not an equivalent out var in previous GLSL ES 3.00 spec, 4.3.10 (Linking of Vertex Outputs and Fragment Inputs), page 45 says the following: The type of vertex outputs and fragment input with the same name must match, otherwise the link command will fail. The precision does not need to match. Only those fragment inputs statically used (i.e. read) in the fragment shader must be declared as outputs in the vertex shader; declaring superfluous vertex shader outputs is permissible. [...] The term static use means that after preprocessing the shader includes at least one statement that accesses the input or output, even if that statement is never actually executed. And it includes a table with all the possibilities. Similar table or content is present in other GLSL specs: GLSL 4.40, GLSL 1.50, etc but for more stages (vertex and geometry shaders, etc). This patch detects that case and returns a link error. It fixes the following dEQP test: dEQP-GLES3.functional.shaders.linkage.varying.rules.illegal_usage_1 However, it adds a new regression in piglit because the test hasn't a vertex shader and it checks the link status. bin/glslparsertest \ tests/spec/glsl-1.50/compiler/gs-also-uses-smooth-flat-noperspective.geom pass \ 1.50 --check-link This piglit test is wrong according to the spec wording above, so if this patch is merged it should be updated. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com Reviewed-by: Ben Widawsky b...@bwidawsk.net --- src/glsl/link_varyings.cpp | 13 + 1 file changed, 13 insertions(+) diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp index 2261799..d6fb1ea 100644 --- a/src/glsl/link_varyings.cpp +++ b/src/glsl/link_varyings.cpp @@ -263,6 +263,19 @@ cross_validate_outputs_to_inputs(struct gl_shader_program *prog, if (output != NULL) { cross_validate_types_and_qualifiers(prog, input, output, consumer-Stage, producer-Stage); + } else { +/* Check for input vars with unmatched output vars in prev stage + * taking into account that interface blocks could have a matching + * output but with different name, so we ignore them. + */ +assert(!input-data.assigned); +if (input-data.used !input-get_interface_type() +!input-data.explicit_location) + linker_error(prog, +%s shader input `%s' +has no matching output in the previous stage\n, +_mesa_shader_stage_to_string(consumer-Stage), +input-name); } } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: allow ForceGLSLVersion to override #version directives
Module: Mesa Branch: master Commit: dbe67d76e0487b04a7b6081d9d4db3c3ee3e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dbe67d76e0487b04a7b6081d9d4db3c3ee3e Author: Brian Paul bri...@vmware.com Date: Fri Mar 27 10:54:10 2015 -0600 glsl: allow ForceGLSLVersion to override #version directives Previously, the ctx-Const.ForceGLSLVersion setting only worked if the shader lacked a #version directive. Now, the ForceGLSLVersion setting will override the #version directive too. This change should be safe since it should be rare to have an app that has a mix of shader versions and we only wanted to override the #version for shaders which lacked the #version directive. Reviewed-by: Ilia Mirkin imir...@alum.mit.edu --- src/glsl/glsl_parser_extras.cpp | 11 +++ src/glsl/glsl_parser_extras.h |1 + src/mesa/main/mtypes.h |4 ++-- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index 79624bc..0aa3c54 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -73,8 +73,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this-uses_builtin_functions = false; /* Set default language version and extensions */ - this-language_version = ctx-Const.ForceGLSLVersion ? -ctx-Const.ForceGLSLVersion : 110; + this-language_version = 110; + this-forced_language_version = ctx-Const.ForceGLSLVersion; this-es_shader = false; this-ARB_texture_rectangle_enable = true; @@ -320,11 +320,14 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, this-ARB_texture_rectangle_enable = false; } - this-language_version = version; + if (this-forced_language_version) + this-language_version = this-forced_language_version; + else + this-language_version = version; bool supported = false; for (unsigned i = 0; i this-num_supported_versions; i++) { - if (this-supported_versions[i].ver == (unsigned) version + if (this-supported_versions[i].ver == this-language_version this-supported_versions[i].es == this-es_shader) { supported = true; break; diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h index 0975c86..1f5478b 100644 --- a/src/glsl/glsl_parser_extras.h +++ b/src/glsl/glsl_parser_extras.h @@ -226,6 +226,7 @@ struct _mesa_glsl_parse_state { bool es_shader; unsigned language_version; + unsigned forced_language_version; gl_shader_stage stage; /** diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 8e1dba6..f718768 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3527,8 +3527,8 @@ struct gl_constants GLboolean ForceGLSLExtensionsWarn; /** -* If non-zero, forces GLSL shaders without the #version directive to behave -* as if they began with #version ForceGLSLVersion. +* If non-zero, forces GLSL shaders to behave as if they began +* with #version ForceGLSLVersion. */ GLuint ForceGLSLVersion; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Don' t bother masking out the low 24 bits for integer multiplies
Module: Mesa Branch: master Commit: c519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d Author: Eric Anholt e...@anholt.net Date: Sun Mar 29 21:26:16 2015 -0700 vc4: Don't bother masking out the low 24 bits for integer multiplies The hardware just uses the low 24 lines, saving us an AND to drop the high bits. total uniforms in shared programs: 13433 - 13423 (-0.07%) uniforms in affected programs: 356 - 346 (-2.81%) total instructions in shared programs: 40003 - 39989 (-0.03%) instructions in affected programs: 910 - 896 (-1.54%) --- src/gallium/drivers/vc4/vc4_program.c | 20 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 49b9466..9e145e5 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -366,18 +366,14 @@ tgsi_to_qir_umul(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) { -struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i], - qir_uniform_ui(c, 24)); -struct qreg src0_lo = qir_AND(c, src[0 * 4 + i], - qir_uniform_ui(c, 0xff)); -struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i], - qir_uniform_ui(c, 24)); -struct qreg src1_lo = qir_AND(c, src[1 * 4 + i], - qir_uniform_ui(c, 0xff)); - -struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo); -struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi); -struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo); +struct qreg src0 = src[0 * 4 + i]; +struct qreg src0_hi = qir_SHR(c, src0, qir_uniform_ui(c, 24)); +struct qreg src1 = src[1 * 4 + i]; +struct qreg src1_hi = qir_SHR(c, src1, qir_uniform_ui(c, 24)); + +struct qreg hilo = qir_MUL24(c, src0_hi, src1); +struct qreg lohi = qir_MUL24(c, src0, src1_hi); +struct qreg lolo = qir_MUL24(c, src0, src1); return qir_ADD(c, lolo, qir_SHL(c, qir_ADD(c, hilo, lohi), ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Make integer multiply use 24 bits for the low parts.
Module: Mesa Branch: master Commit: 5df8bf86fe40ae95ad3888cb167ce80c710af227 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5df8bf86fe40ae95ad3888cb167ce80c710af227 Author: Eric Anholt e...@anholt.net Date: Sun Mar 29 21:21:10 2015 -0700 vc4: Make integer multiply use 24 bits for the low parts. The hardware uses the low 24 bits in integer multiplies, so we can have fewer high bits (and so probably drop them more frequently). --- src/gallium/drivers/vc4/vc4_program.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 56a3a96..49b9466 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -367,13 +367,13 @@ tgsi_to_qir_umul(struct vc4_compile *c, enum qop op, struct qreg *src, int i) { struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i], - qir_uniform_ui(c, 16)); + qir_uniform_ui(c, 24)); struct qreg src0_lo = qir_AND(c, src[0 * 4 + i], - qir_uniform_ui(c, 0x)); + qir_uniform_ui(c, 0xff)); struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i], - qir_uniform_ui(c, 16)); + qir_uniform_ui(c, 24)); struct qreg src1_lo = qir_AND(c, src[1 * 4 + i], - qir_uniform_ui(c, 0x)); + qir_uniform_ui(c, 0xff)); struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo); struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi); @@ -381,7 +381,7 @@ tgsi_to_qir_umul(struct vc4_compile *c, return qir_ADD(c, lolo, qir_SHL(c, qir_ADD(c, hilo, lohi), -qir_uniform_ui(c, 16))); +qir_uniform_ui(c, 24))); } static struct qreg ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Drop integer multiplies with 0 to moves of 0.
Module: Mesa Branch: master Commit: 1dcc1ee314a6907213e2abd5337ec0bbba3bd1bf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1dcc1ee314a6907213e2abd5337ec0bbba3bd1bf Author: Eric Anholt e...@anholt.net Date: Mon Mar 30 10:44:28 2015 -0700 vc4: Drop integer multiplies with 0 to moves of 0. This cleans up more instructions generated by uniform array indexing multiplies. total instructions in shared programs: 39989 - 39961 (-0.07%) instructions in affected programs: 896 - 868 (-3.12%) --- src/gallium/drivers/vc4/vc4_opt_algebraic.c |8 1 file changed, 8 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index d17669a..e40e0f3 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -248,6 +248,14 @@ qir_opt_algebraic(struct vc4_compile *c) } break; +case QOP_MUL24: +if (replace_x_0_with_0(c, inst, 0) || +replace_x_0_with_0(c, inst, 1)) { +progress = true; +break; +} +break; + case QOP_AND: if (replace_x_0_with_0(c, inst, 0) || replace_x_0_with_0(c, inst, 1)) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add a constant folding pass.
Module: Mesa Branch: master Commit: 8c5dcdbccb68b73d2856d9c1faafadc536e682e3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c5dcdbccb68b73d2856d9c1faafadc536e682e3 Author: Eric Anholt e...@anholt.net Date: Mon Mar 30 10:38:21 2015 -0700 vc4: Add a constant folding pass. This cleans up some pointless operations generated by the in-driver mul24 lowering (commonly generated by making a vec4 index for a matrix in a uniform array). I could fill in other operations, but pretty much anything else ought to be getting handled at the NIR level, I think. total uniforms in shared programs: 13423 - 13421 (-0.01%) uniforms in affected programs: 346 - 344 (-0.58%) --- src/gallium/drivers/vc4/Makefile.sources |1 + src/gallium/drivers/vc4/vc4_opt_constant_folding.c | 110 src/gallium/drivers/vc4/vc4_qir.c |1 + src/gallium/drivers/vc4/vc4_qir.h |1 + 4 files changed, 113 insertions(+) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index c7254ea..ec0f25c 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -12,6 +12,7 @@ C_SOURCES := \ vc4_fence.c \ vc4_formats.c \ vc4_opt_algebraic.c \ + vc4_opt_constant_folding.c \ vc4_opt_copy_propagation.c \ vc4_opt_cse.c \ vc4_opt_dead_code.c \ diff --git a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c new file mode 100644 index 000..ac9be5c --- /dev/null +++ b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c @@ -0,0 +1,110 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file vc4_opt_constant_folding.c + * + * Simple constant folding pass to clean up operations on only constants, + * which we might have generated within vc4_program.c. + */ + +#include vc4_qir.h +#include util/u_math.h + +static bool debug; + +static void +dump_from(struct vc4_compile *c, struct qinst *inst) +{ +if (!debug) +return; + +fprintf(stderr, optimizing: ); +qir_dump_inst(c, inst); +fprintf(stderr, \n); +} + +static void +dump_to(struct vc4_compile *c, struct qinst *inst) +{ +if (!debug) +return; + +fprintf(stderr, to: ); +qir_dump_inst(c, inst); +fprintf(stderr, \n); +} + +static bool +constant_fold(struct vc4_compile *c, struct qinst *inst) +{ +int nsrc = qir_get_op_nsrc(inst-op); +uint32_t ui[nsrc]; + +for (int i = 0; i nsrc; i++) { +struct qreg reg = inst-src[i]; +if (reg.file == QFILE_UNIF +c-uniform_contents[reg.index] == QUNIFORM_CONSTANT) { +ui[i] = c-uniform_data[reg.index]; +} else if (reg.file == QFILE_SMALL_IMM) { +ui[i] = reg.index; +} else { +return false; +} +} + +uint32_t result = 0; +switch (inst-op) { +case QOP_SHR: +result = ui[0] ui[1]; +break; + +default: +return false; +} + +dump_from(c, inst); + +inst-src[0] = qir_uniform_ui(c, result); +for (int i = 1; i nsrc; i++) +inst-src[i] = c-undef; +inst-op = QOP_MOV; + +dump_to(c, inst); +return true; +} + +bool +qir_opt_constant_folding(struct vc4_compile *c) +{ +bool progress = false; +struct simple_node *node; + +foreach(node, c-instructions) { +struct qinst *inst = (struct qinst *)node; +if (constant_fold(c, inst)) +progress = true; +} + +return progress; +} diff --git