Module: Mesa Branch: main Commit: 05703a49f9e8b536c2de966ef531710b00d9aea2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=05703a49f9e8b536c2de966ef531710b00d9aea2
Author: Ian Romanick <[email protected]> Date: Fri Jan 14 15:37:50 2022 -0800 glsl: Use csel in do_vec_index_to_cond_assign This matches what NIR does. See nir_vector_extract. This improves code generation for several reasons. First, it only requires 3 comparisons instead of 4 (vec3(i > 0, i > 1, i > 2) vs vec4(i == 0, i == 1, i == 2, i == 3)). Secondly, it shortens the liverange of some values, possibly quite dramatically. Consider a loop in the old version (after lowering if-statements to selects): loop { ... x = csel(i == 0, a[0], x); x = csel(i == 1, a[1], x); x = csel(i == 2, a[2], x); x = csel(i == 3, a[3], x); ... } x is live for the whole loop across iterations. In the new version, x is only live while its value is needed: loop { ... t0 = csel(i > 0 , a[1], a[0]); t1 = csel(i > 2 , a[3], a[2]); x = csel(i > 1, t1, t0); ... } Outside a loop, this also means more values of the array may have their liveness reduced sooner (by consuming two values at once). All Intel platforms had similar results. (Tigerlake shown) total instructions in shared programs: 21171336 -> 21163615 (-0.04%) instructions in affected programs: 89680 -> 81959 (-8.61%) helped: 40 HURT: 4 helped stats (abs) min: 1 max: 450 x̄: 193.68 x̃: 196 helped stats (rel) min: 0.41% max: 13.32% x̄: 6.01% x̃: 6.22% HURT stats (abs) min: 1 max: 12 x̄: 6.50 x̃: 6 HURT stats (rel) min: 0.50% max: 0.66% x̄: 0.58% x̃: 0.58% 95% mean confidence interval for instructions value: -229.68 -121.28 95% mean confidence interval for instructions %-change: -6.93% -3.89% Instructions are helped. total cycles in shared programs: 832879641 -> 829513122 (-0.40%) cycles in affected programs: 44738430 -> 41371911 (-7.52%) helped: 35 HURT: 2 helped stats (abs) min: 2 max: 189948 x̄: 96186.49 x̃: 116154 helped stats (rel) min: 0.37% max: 11.08% x̄: 5.88% x̃: 6.47% HURT stats (abs) min: 4 max: 4 x̄: 4.00 x̃: 4 HURT stats (rel) min: 0.69% max: 0.69% x̄: 0.69% x̃: 0.69% 95% mean confidence interval for cycles value: -112881.94 -69092.06 95% mean confidence interval for cycles %-change: -6.77% -4.27% Cycles are helped. total spills in shared programs: 8061 -> 7338 (-8.97%) spills in affected programs: 873 -> 150 (-82.82%) helped: 24 HURT: 0 total fills in shared programs: 7501 -> 6388 (-14.84%) fills in affected programs: 1389 -> 276 (-80.13%) helped: 24 HURT: 0 Radeon R430 total instructions in shared programs: 2449852 -> 2449136 (-0.03%) instructions in affected programs: 6285 -> 5569 (-11.39%) helped: 64 HURT: 0 helped stats (abs) min: 4 max: 12 x̄: 11.19 x̃: 12 helped stats (rel) min: 8.16% max: 21.62% x̄: 12.09% x̃: 10.91% total consts in shared programs: 1032517 -> 1032482 (<.01%) consts in affected programs: 966 -> 931 (-3.62%) helped: 35 HURT: 0 helped stats (abs) min: 1 max: 1 x̄: 1.00 x̃: 1 helped stats (rel) min: 2.94% max: 10.00% x̄: 4.26% x̃: 3.57% Reviewed-by: Matt Turner <[email protected]> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14573> --- .../glsl/lower_vec_index_to_cond_assign.cpp | 52 +++++++++++++++++++--- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp index 89244266602..70ce16c3ec1 100644 --- a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -41,6 +41,7 @@ #include "ir_optimization.h" #include "compiler/glsl_types.h" #include "ir_builder.h" +#include "program/prog_instruction.h" using namespace ir_builder; @@ -105,14 +106,53 @@ ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ ir_variable *const var = body.make_temp(type, "vec_index_tmp_v"); /* Generate a single comparison condition "mask" for all of the components - * in the vector. + * in the vector. This will be of the form vec3(i > 0, i > 1, i < 2). */ - ir_variable *const cond = - compare_index_block(body, index, 0, orig_vector->type->vector_elements); + ir_rvalue *const broadcast_index = orig_vector->type->vector_elements > 2 + ? swizzle(index, SWIZZLE_XXXX, orig_vector->type->vector_elements - 1) + : operand(index).val; - /* Generate a conditional move of each vector element to the temp. */ - for (unsigned i = 0; i < orig_vector->type->vector_elements; i++) - body.emit(assign(var, swizzle(value, i, 1), swizzle(cond, i, 1))); + ir_constant_data test_indices_data; + memset(&test_indices_data, 0, sizeof(test_indices_data)); + test_indices_data.i[0] = 0; + test_indices_data.i[1] = 1; + test_indices_data.i[2] = 2; + + ir_constant *const test_indices = + new(mem_ctx) ir_constant(broadcast_index->type, &test_indices_data); + + ir_rvalue *const condition_val = greater(broadcast_index, test_indices); + + ir_variable *const cond = body.make_temp(condition_val->type, + "dereference_condition"); + + body.emit(assign(cond, condition_val)); + + + /* Generate a series of conditional selections to pick the right element. */ + assert(orig_vector->type->vector_elements <= 4 && + orig_vector->type->vector_elements >= 2); + + ir_rvalue *rhs = csel(swizzle(cond, 0, 1), + swizzle(value, 1, 1), + swizzle(value, 0, 1)); + + if (orig_vector->type->vector_elements > 2) { + ir_rvalue *tmp; + + if (orig_vector->type->vector_elements > 3) { + tmp = csel(swizzle(cond, 2, 1), + swizzle(value, 3, 1), + swizzle(value, 2, 1)); + + } else { + tmp = swizzle(value, 2, 1); + } + + rhs = csel(swizzle(cond, 1, 1), tmp, rhs); + } + + body.emit(assign(var, rhs)); /* Put all of the new instructions in the IR stream before the old * instruction.
