On 30/01/2019 16:18, Connor Abbott wrote: > > > On Tue, Dec 18, 2018 at 11:35 AM Samuel Iglesias Gonsálvez > <sigles...@igalia.com <mailto:sigles...@igalia.com>> wrote: > > v2: > - Refactor conditions and shared function (Connor) > - Move code to nir_eval_const_opcode() (Connor) > - Don't flush to zero on fquantize2f16 > From Vulkan spec, VK_KHR_shader_float_controls section: > > "3) Do denorm and rounding mode controls apply to OpSpecConstantOp? > > RESOLVED: Yes, except when the opcode is OpQuantizeToF16." > > Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com > <mailto:sigles...@igalia.com>> > --- > src/compiler/nir/nir_constant_expressions.h | 3 +- > src/compiler/nir/nir_constant_expressions.py | 65 ++++++++++++++++++-- > src/compiler/nir/nir_loop_analyze.c | 7 ++- > src/compiler/nir/nir_opt_constant_folding.c | 15 ++--- > src/compiler/spirv/spirv_to_nir.c | 3 +- > 5 files changed, 75 insertions(+), 18 deletions(-) > > diff --git a/src/compiler/nir/nir_constant_expressions.h > b/src/compiler/nir/nir_constant_expressions.h > index 1d6bbbc25d3..a2d416abc45 100644 > --- a/src/compiler/nir/nir_constant_expressions.h > +++ b/src/compiler/nir/nir_constant_expressions.h > @@ -31,6 +31,7 @@ > #include "nir.h" > > nir_const_value nir_eval_const_opcode(nir_op op, unsigned > num_components, > - unsigned bit_size, > nir_const_value *src); > + unsigned bit_size, > nir_const_value *src, > + unsigned > float_controls_execution_mode); > > #endif /* NIR_CONSTANT_EXPRESSIONS_H */ > diff --git a/src/compiler/nir/nir_constant_expressions.py > b/src/compiler/nir/nir_constant_expressions.py > index 505cdd8baae..dc2132df0d0 100644 > --- a/src/compiler/nir/nir_constant_expressions.py > +++ b/src/compiler/nir/nir_constant_expressions.py > @@ -66,6 +66,37 @@ template = """\ > #include "util/bigmath.h" > #include "nir_constant_expressions.h" > > +/** > + * Checks if the provided value is a denorm and flushes it to zero. > +*/ > +static nir_const_value > +constant_denorm_flush_to_zero(nir_const_value value, unsigned > index, unsigned bit_size) > +{ > + switch(bit_size) { > + case 64: > + if (value.u64[index] < 0x0010000000000000) > + value.u64[index] = 0; > + if (value.u64[index] & 0x8000000000000000 && > + !(value.u64[index] & 0x7ff0000000000000)) > + value.u64[index] = 0x8000000000000000; > + break; > + case 32: > + if (value.u32[index] < 0x00800000) > + value.u32[index] = 0; > + if (value.u32[index] & 0x80000000 && > + !(value.u32[index] & 0x7f800000)) > + value.u32[index] = 0x80000000; > + break; > + case 16: > + if (value.u16[index] < 0x0400) > + value.u16[index] = 0; > + if (value.u16[index] & 0x8000 && > + !(value.u16[index] & 0x7c00)) > + value.u16[index] = 0x8000; > + } > + return value; > +} > + > /** > * Evaluate one component of packSnorm4x8. > */ > @@ -260,7 +291,7 @@ struct ${type}${width}_vec { > % endfor > % endfor > > -<%def name="evaluate_op(op, bit_size)"> > +<%def name="evaluate_op(op, bit_size, execution_mode)"> > <% > output_type = type_add_size(op.output_type, bit_size) > input_types = [type_add_size(type_, bit_size) for type_ in > op.input_types] > @@ -277,6 +308,14 @@ struct ${type}${width}_vec { > <% continue %> > %endif > > + % for k in range(op.input_sizes[j]): > + % if op.name <http://op.name> != "fquantize2f16" and > bit_size > 8 and op.input_types[j] == "float": > > > This condition doesn't seem right. It may happen to work, but it isn't > following NIR principles on ALU instructions. Each NIR opcode has an > output type (given by op.output_type) and input types for each source > (given by op.input_types). Each type may be sized, like "float32", or > unsized like "float". All unsized inputs/outputs must have the same bit > size at runtime. The bit_size here is the common bitsize for > inputs/outputs with unsized types like "float" in the opcode definition. > Even though in general it's only known at runtime, we're switching over > all bitsizes in the generated code, so here we do know what it is at > compile time. In order to handle sized types, we have to drop all > references to bit_size and stop comparing op.input_types[j] directly > with "float" since it may be a sized type. Also, if this source has a > float type, we already know that its bit size is at least 16, so the > second check should be useless. I think what you actually want to do is > extract the base type, i.e. something like: op.name <http://op.name> != > "fquantize16" and type_base_name(input_types[j]) == "float" >
Right. Thanks for the explanation. > > + if (execution_mode & > SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { > > > bit_size here isn't the right bit size for sized sources. You want > ${type_size(input_types[i])} which is the actual size of the source at > runtime. This will be bit_size if the op.input_types[j] is unsized or > the input size otherwise. > > + _src[${j}] = > constant_denorm_flush_to_zero(_src[${j}], ${k}, bit_size); > > > same as above with bit_size. > OK, I will do them for the dst ones. > Finally, I think you missed the equivalent hunk for non-per-component > sources. > Actually, I realized that this is not mandatory for the sources, just the destination, so I will remove the source part all together and do the suggested fixes into the destination part. > > + } > + % endif > + % endfor > + > const struct ${input_types[j]}_vec src${j} = { > % for k in range(op.input_sizes[j]): > % if input_types[j] == "int1": > @@ -343,6 +382,12 @@ struct ${type}${width}_vec { > % else: > _dst_val.${get_const_field(output_type)}[_i] = dst; > % endif > + > + % if op.name <http://op.name> != "fquantize2f16" and > bit_size > 8 and op.output_type == "float": > + if (execution_mode & > SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { > + _dst_val = constant_denorm_flush_to_zero(_dst_val, > _i, bit_size); > + } > + % endif > > } > % else: > ## In the non-per-component case, create a struct dst with > @@ -375,6 +420,12 @@ struct ${type}${width}_vec { > % else: > _dst_val.${get_const_field(output_type)}[${k}] = > dst.${"xyzw"[k]}; > % endif > + > + % if op.name <http://op.name> != "fquantize2f16" and > bit_size > 8 and op.output_type == "float": > + if (execution_mode & > SHADER_DENORM_FLUSH_TO_ZERO_FP${bit_size}) { > + _dst_val = constant_denorm_flush_to_zero(_dst_val, > ${k}, bit_size); > + } > + % endif > % endfor > % endif > </%def> > @@ -383,7 +434,8 @@ struct ${type}${width}_vec { > static nir_const_value > evaluate_${name}(MAYBE_UNUSED unsigned num_components, > ${"UNUSED" if op_bit_sizes(op) is None else ""} > unsigned bit_size, > - MAYBE_UNUSED nir_const_value *_src) > + MAYBE_UNUSED nir_const_value *_src, > + MAYBE_UNUSED unsigned execution_mode) > { > nir_const_value _dst_val = { {0, } }; > > @@ -391,7 +443,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned > num_components, > switch (bit_size) { > % for bit_size in op_bit_sizes(op): > case ${bit_size}: { > - ${evaluate_op(op, bit_size)} > + ${evaluate_op(op, bit_size, execution_mode)} > break; > } > % endfor > @@ -400,7 +452,7 @@ evaluate_${name}(MAYBE_UNUSED unsigned > num_components, > unreachable("unknown bit width"); > } > % else: > - ${evaluate_op(op, 0)} > + ${evaluate_op(op, 0, execution_mode)} > % endif > > return _dst_val; > @@ -409,12 +461,13 @@ evaluate_${name}(MAYBE_UNUSED unsigned > num_components, > > nir_const_value > nir_eval_const_opcode(nir_op op, unsigned num_components, > - unsigned bit_width, nir_const_value *src) > + unsigned bit_width, nir_const_value *src, > + unsigned float_controls_execution_mode) > { > switch (op) { > % for name in sorted(opcodes.keys()): > case nir_op_${name}: > - return evaluate_${name}(num_components, bit_width, src); > + return evaluate_${name}(num_components, bit_width, src, > float_controls_execution_mode); > % endfor > default: > unreachable("shouldn't get here"); > diff --git a/src/compiler/nir/nir_loop_analyze.c > b/src/compiler/nir/nir_loop_analyze.c > index 259f02a854e..c9fba8649db 100644 > --- a/src/compiler/nir/nir_loop_analyze.c > +++ b/src/compiler/nir/nir_loop_analyze.c > @@ -497,19 +497,20 @@ test_iterations(int32_t iter_int, > nir_const_value *step, > */ > nir_const_value mul_src[2] = { iter_src, *step }; > nir_const_value mul_result = > - nir_eval_const_opcode(mul_op, 1, bit_size, mul_src); > + nir_eval_const_opcode(mul_op, 1, bit_size, mul_src, > SHADER_DEFAULT_FLOAT_CONTROL_MODE); > > > Shouldn't we be using the actual float control mode for the shader? I > don't know much about this code, but presumably it's trying to simulate > something calculated by the shader. > Good catch! I'll fix it. Sam > > > /* Add the initial value to the accumulated induction variable > total */ > nir_const_value add_src[2] = { mul_result, *initial }; > nir_const_value add_result = > - nir_eval_const_opcode(add_op, 1, bit_size, add_src); > + nir_eval_const_opcode(add_op, 1, bit_size, add_src, > SHADER_DEFAULT_FLOAT_CONTROL_MODE); > > nir_const_value src[2] = { { {0, } }, { {0, } } }; > src[limit_rhs ? 0 : 1] = add_result; > src[limit_rhs ? 1 : 0] = *limit; > > /* Evaluate the loop exit condition */ > - nir_const_value result = nir_eval_const_opcode(cond_op, 1, > bit_size, src); > + nir_const_value result = nir_eval_const_opcode(cond_op, 1, > bit_size, src, > + > SHADER_DEFAULT_FLOAT_CONTROL_MODE); > > return invert_cond ? (result.u32[0] == 0) : (result.u32[0] != 0); > } > diff --git a/src/compiler/nir/nir_opt_constant_folding.c > b/src/compiler/nir/nir_opt_constant_folding.c > index 5097a3bcc36..bd6130d5b33 100644 > --- a/src/compiler/nir/nir_opt_constant_folding.c > +++ b/src/compiler/nir/nir_opt_constant_folding.c > @@ -39,7 +39,7 @@ struct constant_fold_state { > }; > > static bool > -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) > +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, > unsigned execution_mode) > { > nir_const_value src[NIR_MAX_VEC_COMPONENTS]; > > @@ -108,7 +108,7 @@ constant_fold_alu_instr(nir_alu_instr *instr, > void *mem_ctx) > > nir_const_value dest = > nir_eval_const_opcode(instr->op, > instr->dest.dest.ssa.num_components, > - bit_size, src); > + bit_size, src, execution_mode); > > nir_load_const_instr *new_instr = > nir_load_const_instr_create(mem_ctx, > @@ -161,14 +161,14 @@ > constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) > } > > static bool > -constant_fold_block(nir_block *block, void *mem_ctx) > +constant_fold_block(nir_block *block, void *mem_ctx, unsigned > execution_mode) > { > bool progress = false; > > nir_foreach_instr_safe(instr, block) { > switch (instr->type) { > case nir_instr_type_alu: > - progress |= > constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx); > + progress |= > constant_fold_alu_instr(nir_instr_as_alu(instr), mem_ctx, > execution_mode); > break; > case nir_instr_type_intrinsic: > progress |= > @@ -184,13 +184,13 @@ constant_fold_block(nir_block *block, void > *mem_ctx) > } > > static bool > -nir_opt_constant_folding_impl(nir_function_impl *impl) > +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned > execution_mode) > { > void *mem_ctx = ralloc_parent(impl); > bool progress = false; > > nir_foreach_block(block, impl) { > - progress |= constant_fold_block(block, mem_ctx); > + progress |= constant_fold_block(block, mem_ctx, execution_mode); > } > > if (progress) > @@ -204,10 +204,11 @@ bool > nir_opt_constant_folding(nir_shader *shader) > { > bool progress = false; > + unsigned execution_mode = > shader->info.shader_float_controls_execution_mode; > > nir_foreach_function(function, shader) { > if (function->impl) > - progress |= nir_opt_constant_folding_impl(function->impl); > + progress |= nir_opt_constant_folding_impl(function->impl, > execution_mode); > } > > return progress; > diff --git a/src/compiler/spirv/spirv_to_nir.c > b/src/compiler/spirv/spirv_to_nir.c > index 96d4d80970f..7578a83e2bb 100644 > --- a/src/compiler/spirv/spirv_to_nir.c > +++ b/src/compiler/spirv/spirv_to_nir.c > @@ -1842,7 +1842,8 @@ vtn_handle_constant(struct vtn_builder *b, > SpvOp opcode, > } > > val->constant->values[0] = > - nir_eval_const_opcode(op, num_components, bit_size, src); > + nir_eval_const_opcode(op, num_components, bit_size, src, > + > b->shader->info.shader_float_controls_execution_mode); > break; > } /* default */ > } > -- > 2.19.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org <mailto:mesa-dev@lists.freedesktop.org> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev