On 05/12/2018 18:32, Connor Abbott wrote: > Given that other places call nir_eval_const_opcode(), and they'll be > broken unless they also flush denorms, it's probably a good idea to > move all this into nir_eval_const_opcode() itself. >
Thanks for the feedback. I will do this and fix the rest of things you mentioned here. Sam > On Wed, Dec 5, 2018 at 4:56 PM Samuel Iglesias Gonsálvez > <sigles...@igalia.com> wrote: >> >> Signed-off-by: Samuel Iglesias Gonsálvez <sigles...@igalia.com> >> --- >> src/compiler/nir/nir_opt_constant_folding.c | 74 +++++++++++++++++++-- >> 1 file changed, 68 insertions(+), 6 deletions(-) >> >> diff --git a/src/compiler/nir/nir_opt_constant_folding.c >> b/src/compiler/nir/nir_opt_constant_folding.c >> index 1fca530af24..a6df8284e17 100644 >> --- a/src/compiler/nir/nir_opt_constant_folding.c >> +++ b/src/compiler/nir/nir_opt_constant_folding.c >> @@ -39,7 +39,7 @@ struct constant_fold_state { >> }; >> >> static bool >> -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) >> +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx, unsigned >> execution_mode) >> { >> nir_const_value src[NIR_MAX_VEC_COMPONENTS]; >> >> @@ -77,12 +77,39 @@ constant_fold_alu_instr(nir_alu_instr *instr, void >> *mem_ctx) >> switch(load_const->def.bit_size) { >> case 64: >> src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]]; >> + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 && >> + (nir_op_infos[instr->op].input_types[i] == nir_type_float || >> + nir_op_infos[instr->op].input_types[i] == >> nir_type_float64)) { >> + if (src[i].u64[j] < 0x0010000000000000) >> + src[i].u64[j] = 0; >> + if (src[i].u64[j] & 0x8000000000000000 && >> + !(src[i].u64[j] & 0x7ff0000000000000)) >> + src[i].u64[j] = 0x8000000000000000; >> + } > > Given that this code is duplicated for inputs and outputs in this > patch, maybe refactor to a shared helper? > >> break; >> case 32: >> src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]]; >> + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 && >> + (nir_op_infos[instr->op].input_types[i] == nir_type_float || >> + nir_op_infos[instr->op].input_types[i] == >> nir_type_float32)) { >> + if (src[i].u32[j] < 0x00800000) >> + src[i].u32[j] = 0; >> + if (src[i].u32[j] & 0x80000000 && >> + !(src[i].u32[j] & 0x7f800000)) >> + src[i].u32[j] = 0x80000000; >> + } >> break; >> case 16: >> src[i].u16[j] = load_const->value.u16[instr->src[i].swizzle[j]]; >> + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 && >> + (nir_op_infos[instr->op].input_types[i] == nir_type_float || >> + nir_op_infos[instr->op].input_types[i] == >> nir_type_float16)) { >> + if (src[i].u16[j] < 0x0400) >> + src[i].u16[j] = 0; >> + if (src[i].u16[j] & 0x8000 && >> + !(src[i].u16[j] & 0x7c00)) >> + src[i].u16[j] = 0x8000; >> + } >> break; >> case 8: >> src[i].u8[j] = load_const->value.u8[instr->src[i].swizzle[j]]; >> @@ -106,6 +133,40 @@ constant_fold_alu_instr(nir_alu_instr *instr, void >> *mem_ctx) >> nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, >> bit_size, src); >> >> + for (unsigned j = 0; j < instr->dest.dest.ssa.num_components; j++) { >> + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP64 && >> + bit_size == 64 && >> + (nir_op_infos[instr->op].output_type == nir_type_float || >> + nir_op_infos[instr->op].output_type == nir_type_float64)) { > > The bit_size doesn't have to equal the destination bitsize, it's the > bitsize for inputs and outputs which are unsized (e.g. output_type == > nir_type_float instead of nir_type_float32). This should be > (output_type == nir_type_float && bit_size == 64) || output_type == > nir_type_float64, and that goes for for the other bitsizes too. > >> + if (dest.u64[j] < 0x0010000000000000) >> + dest.u64[j] = 0; >> + if (dest.u64[j] & 0x8000000000000000 && >> + !(dest.u64[j] & 0x7ff0000000000000)) >> + dest.u64[j] = 0x8000000000000000; >> + } >> + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP32 && >> + bit_size == 32 && >> + (nir_op_infos[instr->op].output_type == nir_type_float || >> + nir_op_infos[instr->op].output_type == nir_type_float32)) { >> + if (dest.u32[j] < 0x00800000) >> + dest.u32[j] = 0; >> + if (dest.u32[j] & 0x80000000 && >> + !(dest.u32[j] & 0x7f800000)) >> + dest.u32[j] = 0x80000000; >> + } >> + >> + if (execution_mode & SHADER_DENORM_FLUSH_TO_ZERO_FP16 && >> + bit_size == 16 && >> + (nir_op_infos[instr->op].output_type == nir_type_float || >> + nir_op_infos[instr->op].output_type == nir_type_float16)) { >> + if (dest.u16[j] < 0x0400) >> + dest.u16[j] = 0; >> + if (dest.u16[j] & 0x8000 && >> + !(dest.u16[j] & 0x7c00)) >> + dest.u16[j] = 0x8000; >> + } >> + } >> + >> nir_load_const_instr *new_instr = >> nir_load_const_instr_create(mem_ctx, >> instr->dest.dest.ssa.num_components, >> @@ -157,14 +218,14 @@ constant_fold_intrinsic_instr(nir_intrinsic_instr >> *instr) >> } >> >> static bool >> -constant_fold_block(nir_block *block, void *mem_ctx) >> +constant_fold_block(nir_block *block, void *mem_ctx, unsigned >> execution_mode) >> { >> bool progress = false; >> >> nir_foreach_instr_safe(instr, block) { >> switch (instr->type) { >> case nir_instr_type_alu: >> - progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), >> mem_ctx); >> + progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), >> mem_ctx, execution_mode); >> break; >> case nir_instr_type_intrinsic: >> progress |= >> @@ -180,13 +241,13 @@ constant_fold_block(nir_block *block, void *mem_ctx) >> } >> >> static bool >> -nir_opt_constant_folding_impl(nir_function_impl *impl) >> +nir_opt_constant_folding_impl(nir_function_impl *impl, unsigned >> execution_mode) >> { >> void *mem_ctx = ralloc_parent(impl); >> bool progress = false; >> >> nir_foreach_block(block, impl) { >> - progress |= constant_fold_block(block, mem_ctx); >> + progress |= constant_fold_block(block, mem_ctx, execution_mode); >> } >> >> if (progress) >> @@ -200,10 +261,11 @@ bool >> nir_opt_constant_folding(nir_shader *shader) >> { >> bool progress = false; >> + unsigned execution_mode = >> shader->info.shader_float_controls_execution_mode; >> >> nir_foreach_function(function, shader) { >> if (function->impl) >> - progress |= nir_opt_constant_folding_impl(function->impl); >> + progress |= nir_opt_constant_folding_impl(function->impl, >> execution_mode); >> } >> >> return progress; >> -- >> 2.19.1 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
signature.asc
Description: OpenPGP digital signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev