Mesa (master): radeonsi: Cache LLVMTargetMachineRef in context instead of in screen

2015-03-30 Thread Michel Dänzer
Module: Mesa
Branch: master
Commit: d64adc3a79e419062432cfa8d1cbc437676a3fbd
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d64adc3a79e419062432cfa8d1cbc437676a3fbd

Author: Michel Dänzer michel.daen...@amd.com
Date:   Thu Mar 26 11:32:59 2015 +0900

radeonsi: Cache LLVMTargetMachineRef in context instead of in screen

Fixes a crash in genymotion with several threads compiling shaders
concurrently.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89746

Cc: 10.5 mesa-sta...@lists.freedesktop.org
Reviewed-by: Tom Stellard thomas.stell...@amd.com

---

 src/gallium/drivers/radeonsi/si_compute.c   |3 +-
 src/gallium/drivers/radeonsi/si_pipe.c  |   43 ---
 src/gallium/drivers/radeonsi/si_pipe.h  |3 +-
 src/gallium/drivers/radeonsi/si_shader.c|   13 ---
 src/gallium/drivers/radeonsi/si_shader.h|5 +--
 src/gallium/drivers/radeonsi/si_state_shaders.c |4 ++-
 6 files changed, 41 insertions(+), 30 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 8609b89..89bef2e 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -130,7 +130,8 @@ static void *si_create_compute_state(
for (i = 0; i  program-num_kernels; i++) {
LLVMModuleRef mod = 
radeon_llvm_get_kernel_module(program-llvm_ctx, i,
 code, 
header-num_bytes);
-   si_compile_llvm(sctx-screen, program-kernels[i], 
mod);
+   si_compile_llvm(sctx-screen, program-kernels[i], 
sctx-tm,
+   mod);
LLVMDisposeModule(mod);
}
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index d335bda..0eada72 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -69,6 +69,11 @@ static void si_destroy_context(struct pipe_context *context)
si_pm4_cleanup(sctx);
 
r600_common_context_cleanup(sctx-b);
+
+#if HAVE_LLVM = 0x0306
+   LLVMDisposeTargetMachine(sctx-tm);
+#endif
+
FREE(sctx);
 }
 
@@ -77,6 +82,12 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen, void *
struct si_context *sctx = CALLOC_STRUCT(si_context);
struct si_screen* sscreen = (struct si_screen *)screen;
struct radeon_winsys *ws = sscreen-b.ws;
+   LLVMTargetRef r600_target;
+#if HAVE_LLVM = 0x0306
+   const char *triple = amdgcn--;
+#else
+   const char *triple = r600--;
+#endif
int shader, i;
 
if (sctx == NULL)
@@ -170,6 +181,17 @@ static struct pipe_context *si_create_context(struct 
pipe_screen *screen, void *
 */
sctx-scratch_waves = 32 * sscreen-b.info.max_compute_units;
 
+#if HAVE_LLVM = 0x0306
+   /* Initialize LLVM TargetMachine */
+   r600_target = radeon_llvm_get_r600_target(triple);
+   sctx-tm = LLVMCreateTargetMachine(r600_target, triple,
+  
r600_get_llvm_processor_name(sscreen-b.family),
+  +DumpCode,+vgpr-spilling,
+  LLVMCodeGenLevelDefault,
+  LLVMRelocDefault,
+  LLVMCodeModelDefault);
+#endif
+
return sctx-b.b;
 fail:
si_destroy_context(sctx-b.b);
@@ -445,12 +467,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
if (!sscreen-b.ws-unref(sscreen-b.ws))
return;
 
-#if HAVE_LLVM = 0x0306
-   // r600_destroy_common_screen() frees sscreen, so we need to make
-   // sure to dispose the TargetMachine before we call it.
-   LLVMDisposeTargetMachine(sscreen-tm);
-#endif
-
r600_destroy_common_screen(sscreen-b);
 }
 
@@ -508,12 +524,7 @@ static bool si_initialize_pipe_config(struct si_screen 
*sscreen)
 struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 {
struct si_screen *sscreen = CALLOC_STRUCT(si_screen);
-   LLVMTargetRef r600_target;
-#if HAVE_LLVM = 0x0306
-   const char *triple = amdgcn--;
-#else
-   const char *triple = r600--;
-#endif
+
if (sscreen == NULL) {
return NULL;
}
@@ -541,13 +552,5 @@ struct pipe_screen *radeonsi_screen_create(struct 
radeon_winsys *ws)
/* Create the auxiliary context. This must be done last. */
sscreen-b.aux_context = sscreen-b.b.context_create(sscreen-b.b, 
NULL);
 
-#if HAVE_LLVM = 0x0306
-   /* Initialize LLVM TargetMachine */
-   r600_target = radeon_llvm_get_r600_target(triple);
-   sscreen-tm = LLVMCreateTargetMachine(r600_target, triple,
-   r600_get_llvm_processor_name(sscreen-b.family),
-   

Mesa (master): glsl: fail when a shader' s input var has not an equivalent out var in previous

2015-03-30 Thread Samuel Iglesias Gonsálvez
Module: Mesa
Branch: master
Commit: 18004c338f6be8af2e36d2f54972c60136229aeb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=18004c338f6be8af2e36d2f54972c60136229aeb

Author: Samuel Iglesias Gonsalvez sigles...@igalia.com
Date:   Fri Nov 28 11:23:20 2014 +0100

glsl: fail when a shader's input var has not an equivalent out var in previous

GLSL ES 3.00 spec, 4.3.10 (Linking of Vertex Outputs and Fragment Inputs),
page 45 says the following:

The type of vertex outputs and fragment input with the same name must match,
otherwise the link command will fail. The precision does not need to match.
Only those fragment inputs statically used (i.e. read) in the fragment shader
must be declared as outputs in the vertex shader; declaring superfluous vertex
shader outputs is permissible.
[...]
The term static use means that after preprocessing the shader includes at
least one statement that accesses the input or output, even if that statement
is never actually executed.

And it includes a table with all the possibilities.

Similar table or content is present in other GLSL specs: GLSL 4.40, GLSL 1.50,
etc but for more stages (vertex and geometry shaders, etc).

This patch detects that case and returns a link error. It fixes the following
dEQP test:

  dEQP-GLES3.functional.shaders.linkage.varying.rules.illegal_usage_1

However, it adds a new regression in piglit because the test hasn't a
vertex shader and it checks the link status.

bin/glslparsertest \
tests/spec/glsl-1.50/compiler/gs-also-uses-smooth-flat-noperspective.geom pass \
1.50 --check-link

This piglit test is wrong according to the spec wording above, so if this patch
is merged it should be updated.

Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com
Reviewed-by: Ben Widawsky b...@bwidawsk.net

---

 src/glsl/link_varyings.cpp |   13 +
 1 file changed, 13 insertions(+)

diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp
index 2261799..d6fb1ea 100644
--- a/src/glsl/link_varyings.cpp
+++ b/src/glsl/link_varyings.cpp
@@ -263,6 +263,19 @@ cross_validate_outputs_to_inputs(struct gl_shader_program 
*prog,
  if (output != NULL) {
 cross_validate_types_and_qualifiers(prog, input, output,
 consumer-Stage, 
producer-Stage);
+ } else {
+/* Check for input vars with unmatched output vars in prev stage
+ * taking into account that interface blocks could have a matching
+ * output but with different name, so we ignore them.
+ */
+assert(!input-data.assigned);
+if (input-data.used  !input-get_interface_type() 
+!input-data.explicit_location)
+   linker_error(prog,
+%s shader input `%s' 
+has no matching output in the previous stage\n,
+_mesa_shader_stage_to_string(consumer-Stage),
+input-name);
  }
   }
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glsl: allow ForceGLSLVersion to override #version directives

2015-03-30 Thread Brian Paul
Module: Mesa
Branch: master
Commit: dbe67d76e0487b04a7b6081d9d4db3c3ee3e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dbe67d76e0487b04a7b6081d9d4db3c3ee3e

Author: Brian Paul bri...@vmware.com
Date:   Fri Mar 27 10:54:10 2015 -0600

glsl: allow ForceGLSLVersion to override #version directives

Previously, the ctx-Const.ForceGLSLVersion setting only worked if
the shader lacked a #version directive.  Now, the ForceGLSLVersion
setting will override the #version directive too.

This change should be safe since it should be rare to have an app
that has a mix of shader versions and we only wanted to override
the #version for shaders which lacked the #version directive.

Reviewed-by: Ilia Mirkin imir...@alum.mit.edu

---

 src/glsl/glsl_parser_extras.cpp |   11 +++
 src/glsl/glsl_parser_extras.h   |1 +
 src/mesa/main/mtypes.h  |4 ++--
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp
index 79624bc..0aa3c54 100644
--- a/src/glsl/glsl_parser_extras.cpp
+++ b/src/glsl/glsl_parser_extras.cpp
@@ -73,8 +73,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct 
gl_context *_ctx,
this-uses_builtin_functions = false;
 
/* Set default language version and extensions */
-   this-language_version = ctx-Const.ForceGLSLVersion ?
-ctx-Const.ForceGLSLVersion : 110;
+   this-language_version = 110;
+   this-forced_language_version = ctx-Const.ForceGLSLVersion;
this-es_shader = false;
this-ARB_texture_rectangle_enable = true;
 
@@ -320,11 +320,14 @@ _mesa_glsl_parse_state::process_version_directive(YYLTYPE 
*locp, int version,
   this-ARB_texture_rectangle_enable = false;
}
 
-   this-language_version = version;
+   if (this-forced_language_version)
+  this-language_version = this-forced_language_version;
+   else
+  this-language_version = version;
 
bool supported = false;
for (unsigned i = 0; i  this-num_supported_versions; i++) {
-  if (this-supported_versions[i].ver == (unsigned) version
+  if (this-supported_versions[i].ver == this-language_version
this-supported_versions[i].es == this-es_shader) {
  supported = true;
  break;
diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h
index 0975c86..1f5478b 100644
--- a/src/glsl/glsl_parser_extras.h
+++ b/src/glsl/glsl_parser_extras.h
@@ -226,6 +226,7 @@ struct _mesa_glsl_parse_state {
 
bool es_shader;
unsigned language_version;
+   unsigned forced_language_version;
gl_shader_stage stage;
 
/**
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 8e1dba6..f718768 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -3527,8 +3527,8 @@ struct gl_constants
GLboolean ForceGLSLExtensionsWarn;
 
/**
-* If non-zero, forces GLSL shaders without the #version directive to behave
-* as if they began with #version ForceGLSLVersion.
+* If non-zero, forces GLSL shaders to behave as if they began
+* with #version ForceGLSLVersion.
 */
GLuint ForceGLSLVersion;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Don' t bother masking out the low 24 bits for integer multiplies

2015-03-30 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: c519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d

Author: Eric Anholt e...@anholt.net
Date:   Sun Mar 29 21:26:16 2015 -0700

vc4: Don't bother masking out the low 24 bits for integer multiplies

The hardware just uses the low 24 lines, saving us an AND to drop the high
bits.

total uniforms in shared programs: 13433 - 13423 (-0.07%)
uniforms in affected programs: 356 - 346 (-2.81%)
total instructions in shared programs: 40003 - 39989 (-0.03%)
instructions in affected programs: 910 - 896 (-1.54%)

---

 src/gallium/drivers/vc4/vc4_program.c |   20 
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 49b9466..9e145e5 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -366,18 +366,14 @@ tgsi_to_qir_umul(struct vc4_compile *c,
  struct tgsi_full_instruction *tgsi_inst,
  enum qop op, struct qreg *src, int i)
 {
-struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
-  qir_uniform_ui(c, 24));
-struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
-  qir_uniform_ui(c, 0xff));
-struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
-  qir_uniform_ui(c, 24));
-struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
-  qir_uniform_ui(c, 0xff));
-
-struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
-struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
-struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
+struct qreg src0 = src[0 * 4 + i];
+struct qreg src0_hi = qir_SHR(c, src0, qir_uniform_ui(c, 24));
+struct qreg src1 = src[1 * 4 + i];
+struct qreg src1_hi = qir_SHR(c, src1, qir_uniform_ui(c, 24));
+
+struct qreg hilo = qir_MUL24(c, src0_hi, src1);
+struct qreg lohi = qir_MUL24(c, src0, src1_hi);
+struct qreg lolo = qir_MUL24(c, src0, src1);
 
 return qir_ADD(c, lolo, qir_SHL(c,
 qir_ADD(c, hilo, lohi),

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Make integer multiply use 24 bits for the low parts.

2015-03-30 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: 5df8bf86fe40ae95ad3888cb167ce80c710af227
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5df8bf86fe40ae95ad3888cb167ce80c710af227

Author: Eric Anholt e...@anholt.net
Date:   Sun Mar 29 21:21:10 2015 -0700

vc4: Make integer multiply use 24 bits for the low parts.

The hardware uses the low 24 bits in integer multiplies, so we can have
fewer high bits (and so probably drop them more frequently).

---

 src/gallium/drivers/vc4/vc4_program.c |   10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 56a3a96..49b9466 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -367,13 +367,13 @@ tgsi_to_qir_umul(struct vc4_compile *c,
  enum qop op, struct qreg *src, int i)
 {
 struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
-  qir_uniform_ui(c, 16));
+  qir_uniform_ui(c, 24));
 struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
-  qir_uniform_ui(c, 0x));
+  qir_uniform_ui(c, 0xff));
 struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
-  qir_uniform_ui(c, 16));
+  qir_uniform_ui(c, 24));
 struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
-  qir_uniform_ui(c, 0x));
+  qir_uniform_ui(c, 0xff));
 
 struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
 struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
@@ -381,7 +381,7 @@ tgsi_to_qir_umul(struct vc4_compile *c,
 
 return qir_ADD(c, lolo, qir_SHL(c,
 qir_ADD(c, hilo, lohi),
-qir_uniform_ui(c, 16)));
+qir_uniform_ui(c, 24)));
 }
 
 static struct qreg

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Drop integer multiplies with 0 to moves of 0.

2015-03-30 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: 1dcc1ee314a6907213e2abd5337ec0bbba3bd1bf
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1dcc1ee314a6907213e2abd5337ec0bbba3bd1bf

Author: Eric Anholt e...@anholt.net
Date:   Mon Mar 30 10:44:28 2015 -0700

vc4: Drop integer multiplies with 0 to moves of 0.

This cleans up more instructions generated by uniform array indexing
multiplies.

total instructions in shared programs: 39989 - 39961 (-0.07%)
instructions in affected programs: 896 - 868 (-3.12%)

---

 src/gallium/drivers/vc4/vc4_opt_algebraic.c |8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c 
b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index d17669a..e40e0f3 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -248,6 +248,14 @@ qir_opt_algebraic(struct vc4_compile *c)
 }
 break;
 
+case QOP_MUL24:
+if (replace_x_0_with_0(c, inst, 0) ||
+replace_x_0_with_0(c, inst, 1)) {
+progress = true;
+break;
+}
+break;
+
 case QOP_AND:
 if (replace_x_0_with_0(c, inst, 0) ||
 replace_x_0_with_0(c, inst, 1)) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): vc4: Add a constant folding pass.

2015-03-30 Thread Eric Anholt
Module: Mesa
Branch: master
Commit: 8c5dcdbccb68b73d2856d9c1faafadc536e682e3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c5dcdbccb68b73d2856d9c1faafadc536e682e3

Author: Eric Anholt e...@anholt.net
Date:   Mon Mar 30 10:38:21 2015 -0700

vc4: Add a constant folding pass.

This cleans up some pointless operations generated by the in-driver mul24
lowering (commonly generated by making a vec4 index for a matrix in a
uniform array).

I could fill in other operations, but pretty much anything else ought to
be getting handled at the NIR level, I think.

total uniforms in shared programs: 13423 - 13421 (-0.01%)
uniforms in affected programs: 346 - 344 (-0.58%)

---

 src/gallium/drivers/vc4/Makefile.sources   |1 +
 src/gallium/drivers/vc4/vc4_opt_constant_folding.c |  110 
 src/gallium/drivers/vc4/vc4_qir.c  |1 +
 src/gallium/drivers/vc4/vc4_qir.h  |1 +
 4 files changed, 113 insertions(+)

diff --git a/src/gallium/drivers/vc4/Makefile.sources 
b/src/gallium/drivers/vc4/Makefile.sources
index c7254ea..ec0f25c 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -12,6 +12,7 @@ C_SOURCES := \
vc4_fence.c \
vc4_formats.c \
vc4_opt_algebraic.c \
+   vc4_opt_constant_folding.c \
vc4_opt_copy_propagation.c \
vc4_opt_cse.c \
vc4_opt_dead_code.c \
diff --git a/src/gallium/drivers/vc4/vc4_opt_constant_folding.c 
b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c
new file mode 100644
index 000..ac9be5c
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_opt_constant_folding.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_opt_constant_folding.c
+ *
+ * Simple constant folding pass to clean up operations on only constants,
+ * which we might have generated within vc4_program.c.
+ */
+
+#include vc4_qir.h
+#include util/u_math.h
+
+static bool debug;
+
+static void
+dump_from(struct vc4_compile *c, struct qinst *inst)
+{
+if (!debug)
+return;
+
+fprintf(stderr, optimizing: );
+qir_dump_inst(c, inst);
+fprintf(stderr, \n);
+}
+
+static void
+dump_to(struct vc4_compile *c, struct qinst *inst)
+{
+if (!debug)
+return;
+
+fprintf(stderr, to: );
+qir_dump_inst(c, inst);
+fprintf(stderr, \n);
+}
+
+static bool
+constant_fold(struct vc4_compile *c, struct qinst *inst)
+{
+int nsrc = qir_get_op_nsrc(inst-op);
+uint32_t ui[nsrc];
+
+for (int i = 0; i  nsrc; i++) {
+struct qreg reg = inst-src[i];
+if (reg.file == QFILE_UNIF 
+c-uniform_contents[reg.index] == QUNIFORM_CONSTANT) {
+ui[i] = c-uniform_data[reg.index];
+} else if (reg.file == QFILE_SMALL_IMM) {
+ui[i] = reg.index;
+} else {
+return false;
+}
+}
+
+uint32_t result = 0;
+switch (inst-op) {
+case QOP_SHR:
+result = ui[0]  ui[1];
+break;
+
+default:
+return false;
+}
+
+dump_from(c, inst);
+
+inst-src[0] = qir_uniform_ui(c, result);
+for (int i = 1; i  nsrc; i++)
+inst-src[i] = c-undef;
+inst-op = QOP_MOV;
+
+dump_to(c, inst);
+return true;
+}
+
+bool
+qir_opt_constant_folding(struct vc4_compile *c)
+{
+bool progress = false;
+struct simple_node *node;
+
+foreach(node, c-instructions) {
+struct qinst *inst = (struct qinst *)node;
+if (constant_fold(c, inst))
+progress = true;
+}
+
+return progress;
+}
diff --git