On 07/29/2014 02:36 AM, Petri Latvala wrote: > Add an optimization pass that drops min/max expression operands that > can be proven to not contribute to the final result. The algorithm is > similar to alpha-beta pruning on a minmax search, from the field of > AI. > > This optimization pass can optimize min/max expressions where operands > are min/max expressions. Such code can appear in shaders by itself, or > as the result of clamp() or AMD_shader_trinary_minmax functions. > > This optimization pass improves the generated code for piglit's > AMD_shader_trinary_minmax tests as follows: > > total instructions in shared programs: 75 -> 67 (-10.67%) > instructions in affected programs: 60 -> 52 (-13.33%) > GAINED: 0 > LOST: 0 > > All tests (max3, min3, mid3) improved.
And I assume no piglit regressions? Also... have you tried this in combination with Abdiel's related work on saturates? > A full shader-db run: > > total instructions in shared programs: 4293603 -> 4293575 (-0.00%) > instructions in affected programs: 1188 -> 1160 (-2.36%) > GAINED: 0 > LOST: 0 > > Improvements happen in Guacamelee and Serious Sam 3. One shader from > Dungeon Defenders is hurt by shader-db metrics (26 -> 28), because of > dropping of a (constant float (0.00000)) operand, which was > compiled to a saturate modifier. > > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=76861 > Signed-off-by: Petri Latvala <petri.latv...@intel.com> > --- > src/glsl/Makefile.sources | 1 + > src/glsl/glsl_parser_extras.cpp | 1 + > src/glsl/ir_optimization.h | 1 + > src/glsl/opt_minmax.cpp | 395 > ++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 398 insertions(+) > create mode 100644 src/glsl/opt_minmax.cpp > > diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources > index b54eae7..1ee80a3 100644 > --- a/src/glsl/Makefile.sources > +++ b/src/glsl/Makefile.sources > @@ -95,6 +95,7 @@ LIBGLSL_FILES = \ > $(GLSL_SRCDIR)/opt_flip_matrices.cpp \ > $(GLSL_SRCDIR)/opt_function_inlining.cpp \ > $(GLSL_SRCDIR)/opt_if_simplification.cpp \ > + $(GLSL_SRCDIR)/opt_minmax.cpp \ > $(GLSL_SRCDIR)/opt_noop_swizzle.cpp \ > $(GLSL_SRCDIR)/opt_rebalance_tree.cpp \ > $(GLSL_SRCDIR)/opt_redundant_jumps.cpp \ > diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp > index 890123a..9f57ef3 100644 > --- a/src/glsl/glsl_parser_extras.cpp > +++ b/src/glsl/glsl_parser_extras.cpp > @@ -1561,6 +1561,7 @@ do_common_optimization(exec_list *ir, bool linked, > else > progress = do_constant_variable_unlinked(ir) || progress; > progress = do_constant_folding(ir) || progress; > + progress = do_minmax_prune(ir) || progress; > progress = do_cse(ir) || progress; > progress = do_rebalance_tree(ir) || progress; > progress = do_algebraic(ir, native_integers, options) || progress; > diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h > index b83c225..9d22585 100644 > --- a/src/glsl/ir_optimization.h > +++ b/src/glsl/ir_optimization.h > @@ -98,6 +98,7 @@ bool opt_flatten_nested_if_blocks(exec_list *instructions); > bool do_discard_simplification(exec_list *instructions); > bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = > 0); > bool do_mat_op_to_vec(exec_list *instructions); > +bool do_minmax_prune(exec_list *instructions); > bool do_noop_swizzle(exec_list *instructions); > bool do_structure_splitting(exec_list *instructions); > bool do_swizzle_swizzle(exec_list *instructions); > diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp > new file mode 100644 > index 0000000..5656059 > --- /dev/null > +++ b/src/glsl/opt_minmax.cpp > @@ -0,0 +1,395 @@ > +/* > + * Copyright © 2014 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > + > +/** > + * \file opt_minmax.cpp > + * > + * Drop operands from an expression tree of only min/max operations if they > + * can be proven to not contribute to the final result. > + * > + * The algorithm is similar to alpha-beta pruning on a minmax search. > + */ > + > +#include "ir.h" > +#include "ir_visitor.h" > +#include "ir_rvalue_visitor.h" > +#include "ir_optimization.h" > +#include "glsl_types.h" > +#include "main/macros.h" > + > +namespace > +{ > +class minmax_range > +{ > +public: > + > + minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) > + { > + range[0] = low; > + range[1] = high; > + } > + > + /* [0] is the lower limit of the range, [1] is the higher limit. NULL on > + * [0] means negative infinity (unlimited) and on [1] positive infinity > + * (unlimited). Because of the two interpretations of the value NULL, > + * arbitrary comparison between ir_constants is impossible. > + */ > + ir_constant *range[2]; > +}; > + > +class ir_minmax_visitor : public ir_rvalue_enter_visitor { > +public: > + ir_minmax_visitor() > + : progress(false) > + , valid(true) > + { > + } > + > + virtual ~ir_minmax_visitor() > + { > + } > + > + bool > + less_all_components(ir_constant *one, ir_constant *two); > + > + ir_constant * > + smaller_constant(ir_constant *one, ir_constant *two); > + > + ir_constant * > + larger_constant(ir_constant *one, ir_constant *two); > + > + minmax_range > + combine_range(minmax_range r0, minmax_range r1, bool ismin); > + > + minmax_range > + range_intersection(minmax_range r0, minmax_range r1); > + > + minmax_range > + get_range(ir_rvalue *rval); > + > + ir_rvalue * > + prune_expression(ir_expression *expr, minmax_range baserange); > + > + void > + handle_rvalue(ir_rvalue **rvalue); > + > + bool progress; > + bool valid; > +}; > + > +/* > + * Returns true if all vector components of `one' are less than of `two'. > + * > + * If there are vector components that are less while others are greater, the > + * visitor is marked invalid and no further changes will be made to the IR. > + */ > +bool > +ir_minmax_visitor::less_all_components(ir_constant *one, ir_constant *two) > +{ > + assert(one != NULL); > + assert(two != NULL); > + > + assert(one->type->base_type == two->type->base_type); > + > + unsigned oneinc = one->type->is_scalar() ? 0 : 1; > + unsigned twoinc = two->type->is_scalar() ? 0 : 1; > + unsigned components = MAX2(one->type->components(), > two->type->components()); > + > + /* No early escape. We need to go through all components and mark the > + * visitor as invalid if comparison yields less for some components and > + * greater for others. If some components compare as equal, it's not > + * invalid. > + */ > + > + bool foundless = false; > + bool foundgreater = false; > + bool foundequal = false; > + > + for (unsigned i = 0, c0 = 0, c1 = 0; > + i < components; > + c0 += oneinc, c1 += twoinc, ++i) { > + switch (one->type->base_type) { > + case GLSL_TYPE_UINT: > + if (one->value.u[c0] < two->value.u[c1]) > + foundless = true; > + else if (one->value.u[c0] > two->value.u[c1]) > + foundgreater = true; > + else > + foundequal = true; > + continue; Since there's nothing else in the loop, I'd rather see these be break statements. Seeing continue here is a weird and a little unnerving. > + case GLSL_TYPE_INT: > + if (one->value.i[c0] < two->value.i[c1]) > + foundless = true; > + else if (one->value.i[c0] > two->value.i[c1]) > + foundgreater = true; > + else > + foundequal = true; > + continue; > + case GLSL_TYPE_FLOAT: > + if (one->value.f[c0] < two->value.f[c1]) > + foundless = true; > + else if (one->value.f[c0] > two->value.f[c1]) > + foundgreater = true; > + else > + foundequal = true; > + continue; > + default: > + assert(!"not reached"); Use unreachable("Invalid type") here. > + } > + } > + > + if (foundless && foundgreater) { > + valid = false; > + return false; > + } > + > + if (foundequal) > + return false; > + > + return foundless; > +} > + > +ir_constant * > +ir_minmax_visitor::smaller_constant(ir_constant *one, ir_constant *two) > +{ > + assert(one != NULL); > + assert(two != NULL); > + > + if (less_all_components(one, two)) > + return one; > + else > + return two; > +} > + > +ir_constant * > +ir_minmax_visitor::larger_constant(ir_constant *one, ir_constant *two) > +{ > + assert(one != NULL); > + assert(two != NULL); > + > + if (less_all_components(one, two)) > + return two; > + else > + return one; > +} > + > +/* Combines two ranges by doing an element-wise min() / max() depending on > the > + * operation. > + */ > +minmax_range > +ir_minmax_visitor::combine_range(minmax_range r0, minmax_range r1, bool > ismin) > +{ > + minmax_range ret; > + > + if (!r0.range[0]) { > + ret.range[0] = ismin ? r0.range[0] : r1.range[0]; > + } else if (!r1.range[0]) { > + ret.range[0] = ismin ? r1.range[0] : r0.range[0]; > + } else { > + ret.range[0] = ismin ? smaller_constant(r0.range[0], r1.range[0]) : > + larger_constant(r0.range[0], r1.range[0]); > + } > + > + if (!r0.range[1]) { > + ret.range[1] = ismin ? r1.range[1] : r0.range[1]; > + } else if (!r1.range[1]) { > + ret.range[1] = ismin ? r0.range[1] : r1.range[1]; > + } else { > + ret.range[1] = ismin ? smaller_constant(r0.range[1], r1.range[1]) : > + larger_constant(r0.range[1], r1.range[1]); > + } > + > + return ret; > +} > + > +/* Returns a range so that lower limit is the larger of the two lower limits, > + * and higher limit is the smaller of the two higher limits. > + */ > +minmax_range > +ir_minmax_visitor::range_intersection(minmax_range r0, minmax_range r1) > +{ > + minmax_range ret; > + > + if (!r0.range[0]) > + ret.range[0] = r1.range[0]; > + else if (!r1.range[0]) > + ret.range[0] = r0.range[0]; > + else > + ret.range[0] = larger_constant(r0.range[0], r1.range[0]); > + > + if (!r0.range[1]) > + ret.range[1] = r1.range[1]; > + else if (!r1.range[1]) > + ret.range[1] = r0.range[1]; > + else > + ret.range[1] = smaller_constant(r0.range[1], r1.range[1]); > + > + return ret; > +} > + > +minmax_range > +ir_minmax_visitor::get_range(ir_rvalue *rval) > +{ > + if (ir_expression *expr = rval->as_expression()) { > + if (expr->operation == ir_binop_min || > + expr->operation == ir_binop_max) { > + minmax_range r0 = get_range(expr->operands[0]); > + minmax_range r1 = get_range(expr->operands[1]); > + > + return combine_range(r0, r1, expr->operation == ir_binop_min); > + } > + } > + > + if (ir_constant *c = rval->as_constant()) { > + return minmax_range(c, c); > + } > + > + return minmax_range(); > +} > + > +ir_rvalue * > +ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range > baserange) > +{ > + assert(expr->operation == ir_binop_min || > + expr->operation == ir_binop_max); > + > + if (!valid) > + return expr; > + > + bool ismin = expr->operation == ir_binop_min; > + minmax_range limits[2]; > + > + for (unsigned i = 0; i < 2; ++i) { > + limits[i] = get_range(expr->operands[i]); > + } > + > + for (unsigned i = 0; i < 2; ++i) { > + /* Operands are dropped if their range doesn't reach the given > baserange > + * or the other operand's limit from the appropriate direction. > + */ > + > + minmax_range basereach; > + minmax_range otherreach; > + > + if (ismin) { > + basereach.range[0] = limits[i].range[0]; > + otherreach.range[0] = limits[i].range[0]; > + basereach.range[1] = baserange.range[1]; > + otherreach.range[1] = limits[1 - i].range[1]; > + } else { > + basereach.range[0] = baserange.range[0]; > + otherreach.range[0] = limits[1 - i].range[0]; > + basereach.range[1] = limits[i].range[1]; > + otherreach.range[1] = limits[i].range[1]; > + } > + > + if ((basereach.range[0] && basereach.range[1] && > + !less_all_components(basereach.range[0], basereach.range[1])) || > + (otherreach.range[0] && otherreach.range[1] && > + !less_all_components(otherreach.range[0], otherreach.range[1]))) { > + /* Bail out if those comparisons made the visitor invalid. */ > + if (!valid) > + return expr; > + > + progress = true; > + > + /* Recurse if necessary. */ > + if (ir_expression* opexpr = expr->operands[1 - i]->as_expression()) > { > + if (opexpr->operation == ir_binop_min || > + opexpr->operation == ir_binop_max) { > + return prune_expression(opexpr, baserange); > + } > + } > + > + return expr->operands[1 - i]; > + } > + } > + > + /* Now recurse to operands giving them the proper baserange. The baserange > + * to pass is the intersection of our baserange and the other operand's > + * limit with one of the ranges unlimited. > + */ > + > + unsigned clear = (ismin ? 0 : 1); > + > + for (unsigned i = 0; i < 2; ++i) { > + if (ir_expression *opexpr = expr->operands[i]->as_expression()) { > + if (opexpr->operation == ir_binop_min || > + opexpr->operation == ir_binop_max) { > + limits[1 - i].range[clear] = NULL; > + minmax_range base = range_intersection(limits[1 - i], baserange); > + expr->operands[i] = prune_expression(opexpr, base); > + } > + } > + } > + > + return expr; > +} > + > +ir_rvalue * > +swizzle_if_required(ir_expression *expr, > + ir_rvalue *rval) > +{ > + if (expr->type->is_vector() && rval->type->is_scalar()) { > + return new(expr) ir_swizzle(rval, 0, 0, 0, 0, > + expr->type->vector_elements); > + } else { > + return rval; > + } > +} > + > +void > +ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) > +{ > + if (!*rvalue) > + return; > + > + ir_expression *expr = (*rvalue)->as_expression(); > + if (!expr || (expr->operation != ir_binop_min && > + expr->operation != ir_binop_max)) > + return; > + > + ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); > + if (!valid || new_rvalue == *rvalue) { > + return; > + } > + > + /* If the expression type is a vector and the optimization leaves a scalar > + * as the result, we need to turn it into a vector. > + */ > + *rvalue = swizzle_if_required(expr, new_rvalue); > + > + progress = true; > +} > + > +} > + > +bool > +do_minmax_prune(exec_list *instructions) > +{ > + ir_minmax_visitor v; > + > + visit_list_elements(&v, instructions); > + > + return v.progress; > +} > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev