On Fri, Apr 8, 2016 at 9:36 AM, Eduardo Lima Mitev <el...@igalia.com> wrote:
> On 04/08/2016 01:35 AM, Kenneth Graunke wrote: > >> This makes the extra multiply visible to NIR's algebraic optimizations >> (for constant reassociation) as well as constant folding. This means >> that when the result of sin/cos are multiplied by an constant, we can >> eliminate the extra multiply altogether, reducing the cost of the >> workaround. >> >> It also means we only have to implement it one place, rather than in >> both backends. >> >> This makes INTEL_PRECISE_TRIG=1 cost nothing on GPUTest/Volplosion, >> which has a ton of sin() calls, but always multiplies them by an >> immediate constant. The extra multiply gets folded away. >> >> > Cleaner, indeed. Patch (and series) is: > > Reviewed-by: Eduardo Lima Mitev <el...@igalia.com> > > Thanks! > > Eduardo > > > Signed-off-by: Kenneth Graunke <kenn...@whitecape.org> >> --- >> src/mesa/drivers/dri/i965/Makefile.am | 5 +++ >> src/mesa/drivers/dri/i965/Makefile.sources | 1 + >> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +------- >> src/mesa/drivers/dri/i965/brw_nir.c | 3 ++ >> src/mesa/drivers/dri/i965/brw_nir.h | 2 + >> .../drivers/dri/i965/brw_nir_trig_workarounds.py | 43 >> ++++++++++++++++++++++ >> src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 16 +------- >> 7 files changed, 58 insertions(+), 28 deletions(-) >> create mode 100755 src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py >> >> diff --git a/src/mesa/drivers/dri/i965/Makefile.am >> b/src/mesa/drivers/dri/i965/Makefile.am >> index 0db5a51..a41c830 100644 >> --- a/src/mesa/drivers/dri/i965/Makefile.am >> +++ b/src/mesa/drivers/dri/i965/Makefile.am >> @@ -33,6 +33,7 @@ AM_CFLAGS = \ >> -I$(top_srcdir)/src/mesa/drivers/dri/common \ >> -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ >> -I$(top_srcdir)/src/gtest/include \ >> + -I$(top_srcdir)/src/compiler/nir \ >> -I$(top_builddir)/src/compiler/nir \ >> -I$(top_builddir)/src/mesa/drivers/dri/common \ >> $(DEFINES) \ >> @@ -41,6 +42,10 @@ AM_CFLAGS = \ >> >> AM_CXXFLAGS = $(AM_CFLAGS) >> >> +brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py >> $(top_srcdir)/src/compiler/nir/nir_algebraic.py >> + $(MKDIR_GEN) >> + $(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) >> $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; >> false) >> + >> noinst_LTLIBRARIES = libi965_dri.la libi965_compiler.la >> libi965_dri_la_SOURCES = $(i965_FILES) >> libi965_dri_la_LIBADD = libi965_compiler.la $(INTEL_LIBS) >> diff --git a/src/mesa/drivers/dri/i965/Makefile.sources >> b/src/mesa/drivers/dri/i965/Makefile.sources >> index 4689588..2619e43 100644 >> --- a/src/mesa/drivers/dri/i965/Makefile.sources >> +++ b/src/mesa/drivers/dri/i965/Makefile.sources >> @@ -44,6 +44,7 @@ i965_compiler_FILES = \ >> brw_nir.c \ >> brw_nir_analyze_boolean_resolves.c \ >> brw_nir_attribute_workarounds.c \ >> + brw_nir_trig_workarounds.c \ >> brw_nir_opt_peephole_ffma.c \ >> brw_nir_uniforms.cpp \ >> brw_packed_float.c \ >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> index 90b8789..bd6314a 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp >> @@ -775,24 +775,12 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, >> nir_alu_instr *instr) >> break; >> >> case nir_op_fsin: >> - if (!compiler->precise_trig) { >> - inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); >> - } else { >> - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); >> - inst = bld.emit(SHADER_OPCODE_SIN, tmp, op[0]); >> - inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); >> - } >> + inst = bld.emit(SHADER_OPCODE_SIN, result, op[0]); >> inst->saturate = instr->dest.saturate; >> break; >> >> case nir_op_fcos: >> - if (!compiler->precise_trig) { >> - inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); >> - } else { >> - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F); >> - inst = bld.emit(SHADER_OPCODE_COS, tmp, op[0]); >> - inst = bld.MUL(result, tmp, brw_imm_f(0.99997)); >> - } >> + inst = bld.emit(SHADER_OPCODE_COS, result, op[0]); >> inst->saturate = instr->dest.saturate; >> break; >> >> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c >> b/src/mesa/drivers/dri/i965/brw_nir.c >> index 1821c0d..932979a 100644 >> --- a/src/mesa/drivers/dri/i965/brw_nir.c >> +++ b/src/mesa/drivers/dri/i965/brw_nir.c >> @@ -447,6 +447,9 @@ brw_preprocess_nir(const struct brw_compiler >> *compiler, nir_shader *nir) >> if (nir->stage == MESA_SHADER_GEOMETRY) >> OPT(nir_lower_gs_intrinsics); >> >> + if (compiler->precise_trig) >> + OPT(brw_nir_apply_trig_workarounds); >> + >> static const nir_lower_tex_options tex_options = { >> .lower_txp = ~0, >> }; >> diff --git a/src/mesa/drivers/dri/i965/brw_nir.h >> b/src/mesa/drivers/dri/i965/brw_nir.h >> index b10c083..2711606 100644 >> --- a/src/mesa/drivers/dri/i965/brw_nir.h >> +++ b/src/mesa/drivers/dri/i965/brw_nir.h >> @@ -106,6 +106,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader >> *nir, >> bool use_legacy_snorm_formula, >> const uint8_t >> *attrib_wa_flags); >> >> +bool brw_nir_apply_trig_workarounds(nir_shader *nir); >> + >> nir_shader *brw_nir_apply_sampler_key(nir_shader *nir, >> const struct brw_device_info >> *devinfo, >> const struct >> brw_sampler_prog_key_data *key, >> diff --git a/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py >> b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py >> new file mode 100755 >> index 0000000..67dab9a >> --- /dev/null >> +++ b/src/mesa/drivers/dri/i965/brw_nir_trig_workarounds.py >> @@ -0,0 +1,43 @@ >> +#! /usr/bin/env python >> +# >> +# Copyright (C) 2016 Intel Corporation >> +# >> +# Permission is hereby granted, free of charge, to any person obtaining a >> +# copy of this software and associated documentation files (the >> "Software"), >> +# to deal in the Software without restriction, including without >> limitation >> +# the rights to use, copy, modify, merge, publish, distribute, >> sublicense, >> +# and/or sell copies of the Software, and to permit persons to whom the >> +# Software is furnished to do so, subject to the following conditions: >> +# >> +# The above copyright notice and this permission notice (including the >> next >> +# paragraph) shall be included in all copies or substantial portions of >> the >> +# Software. >> +# >> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, >> EXPRESS OR >> +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF >> MERCHANTABILITY, >> +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT >> SHALL >> +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR >> OTHER >> +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING >> +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER >> DEALINGS >> +# IN THE SOFTWARE. >> + >> +import nir_algebraic >> + >> +# The SIN and COS instructions on Intel hardware can produce values >> +# slightly outside of the [-1.0, 1.0] range for a small set of values. >> +# Obviously, this can break everyone's expectations about trig functions. >> +# >> +# According to an internal presentation, the COS instruction can produce >> +# a value up to 1.000027 for inputs in the range (0.08296, 0.09888). One >> +# suggested workaround is to multiply by 0.99997, scaling down the >> +# amplitude slightly. Apparently this also minimizes the error function, >> +# reducing the maximum error from 0.00006 to about 0.00003. >> + >> +trig_workarounds = [ >> + (('fsin', 'x'), ('fmul', ('fsin', 'x'), 0.99997)), >> + (('fcos', 'x'), ('fmul', ('fcos', 'x'), 0.99997)), >> +] >> + >> +print '#include "brw_nir.h"' >> +print nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds", >> + trig_workarounds).render() >> > Side note: Part of the intention of nir_algebraic when I wrote it over a year ago was to make adding custom algebraic transformation passes very light-weight and easy. Glad to see someone finally taking advantage of that. :-) Series Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> index d9f96c5..e4e8c38 100644 >> --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp >> @@ -1101,24 +1101,12 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr) >> break; >> >> case nir_op_fsin: >> - if (!compiler->precise_trig) { >> - inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); >> - } else { >> - src_reg tmp = src_reg(this, glsl_type::vec4_type); >> - inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]); >> - inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); >> - } >> + inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]); >> inst->saturate = instr->dest.saturate; >> break; >> >> case nir_op_fcos: >> - if (!compiler->precise_trig) { >> - inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); >> - } else { >> - src_reg tmp = src_reg(this, glsl_type::vec4_type); >> - inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]); >> - inst = emit(MUL(dst, tmp, brw_imm_f(0.99997))); >> - } >> + inst = emit_math(SHADER_OPCODE_COS, dst, op[0]); >> inst->saturate = instr->dest.saturate; >> break; >> >> >> > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev