Reviewed-by: Jason Ekstrand <ja...@jlekstrand.net> On Mon, Aug 27, 2018 at 3:54 PM Ian Romanick <i...@freedesktop.org> wrote:
> From: Ian Romanick <ian.d.roman...@intel.com> > > Funny story... a single shader was hurt for instructions, spills, fills. > That same shader was also the most helped for cycles. #GPUsAreWeird > > No changes on any other Intel platform. > > v2: Refactor selection of atomic opcode to a separate function. > Suggested by Jason. > > Haswell, Broadwell, and Skylake had similar results. (Skylake shown) > total instructions in shared programs: 14304116 -> 14304261 (<.01%) > instructions in affected programs: 12776 -> 12921 (1.13%) > helped: 19 > HURT: 1 > helped stats (abs) min: 1 max: 16 x̄: 2.32 x̃: 1 > helped stats (rel) min: 0.05% max: 7.27% x̄: 0.92% x̃: 0.55% > HURT stats (abs) min: 189 max: 189 x̄: 189.00 x̃: 189 > HURT stats (rel) min: 4.87% max: 4.87% x̄: 4.87% x̃: 4.87% > 95% mean confidence interval for instructions value: -12.83 27.33 > 95% mean confidence interval for instructions %-change: -1.57% 0.31% > Inconclusive result (value mean confidence interval includes 0). > > total cycles in shared programs: 527552861 -> 527531226 (<.01%) > cycles in affected programs: 1459195 -> 1437560 (-1.48%) > helped: 16 > HURT: 2 > helped stats (abs) min: 2 max: 21328 x̄: 1353.69 x̃: 6 > helped stats (rel) min: 0.01% max: 5.29% x̄: 0.36% x̃: 0.03% > HURT stats (abs) min: 12 max: 12 x̄: 12.00 x̃: 12 > HURT stats (rel) min: 0.03% max: 0.03% x̄: 0.03% x̃: 0.03% > 95% mean confidence interval for cycles value: -3699.81 1295.92 > 95% mean confidence interval for cycles %-change: -0.94% 0.30% > Inconclusive result (value mean confidence interval includes 0). > > total spills in shared programs: 8025 -> 8033 (0.10%) > spills in affected programs: 208 -> 216 (3.85%) > helped: 1 > HURT: 1 > > total fills in shared programs: 10989 -> 11040 (0.46%) > fills in affected programs: 444 -> 495 (11.49%) > helped: 1 > HURT: 1 > > Ivy Bridge > total instructions in shared programs: 11709181 -> 11709153 (<.01%) > instructions in affected programs: 3505 -> 3477 (-0.80%) > helped: 3 > HURT: 0 > helped stats (abs) min: 1 max: 23 x̄: 9.33 x̃: 4 > helped stats (rel) min: 0.11% max: 1.16% x̄: 0.63% x̃: 0.61% > > total cycles in shared programs: 254741126 -> 254738801 (<.01%) > cycles in affected programs: 919067 -> 916742 (-0.25%) > helped: 3 > HURT: 0 > helped stats (abs) min: 21 max: 2144 x̄: 775.00 x̃: 160 > helped stats (rel) min: 0.03% max: 0.90% x̄: 0.32% x̃: 0.03% > > total spills in shared programs: 4536 -> 4533 (-0.07%) > spills in affected programs: 40 -> 37 (-7.50%) > helped: 1 > HURT: 0 > > total fills in shared programs: 4819 -> 4813 (-0.12%) > fills in affected programs: 94 -> 88 (-6.38%) > helped: 1 > HURT: 0 > > Signed-off-by: Ian Romanick <ian.d.roman...@intel.com> > Reviewed-by: Caio Marcelo de Oliveira Filho <caio.olive...@intel.com> [v1] > --- > src/intel/compiler/brw_fs_nir.cpp | 27 +++++++++++++++++++++++---- > 1 file changed, 23 insertions(+), 4 deletions(-) > > diff --git a/src/intel/compiler/brw_fs_nir.cpp > b/src/intel/compiler/brw_fs_nir.cpp > index 9c9df5ac09f..67c39a661ec 100644 > --- a/src/intel/compiler/brw_fs_nir.cpp > +++ b/src/intel/compiler/brw_fs_nir.cpp > @@ -3604,6 +3604,21 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder > &bld, > } > } > > +static int > +get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src) > +{ > + const nir_const_value *const val = > nir_src_as_const_value(instr->src[src]); > + > + if (val != NULL) { > + if (val->i32[0] == 1) > + return BRW_AOP_INC; > + else if (val->i32[0] == -1) > + return BRW_AOP_DEC; > + } > + > + return BRW_AOP_ADD; > +} > + > void > fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, > nir_intrinsic_instr *instr) > @@ -3660,7 +3675,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder > &bld, > } > > case nir_intrinsic_shared_atomic_add: > - nir_emit_shared_atomic(bld, BRW_AOP_ADD, instr); > + nir_emit_shared_atomic(bld, get_op_for_atomic_add(instr, 1), instr); > break; > case nir_intrinsic_shared_atomic_imin: > nir_emit_shared_atomic(bld, BRW_AOP_IMIN, instr); > @@ -4378,7 +4393,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder > &bld, nir_intrinsic_instr *instr > } > > case nir_intrinsic_ssbo_atomic_add: > - nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr); > + nir_emit_ssbo_atomic(bld, get_op_for_atomic_add(instr, 2), instr); > break; > case nir_intrinsic_ssbo_atomic_imin: > nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr); > @@ -4888,7 +4903,9 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder > &bld, > } > > fs_reg offset = get_nir_src(instr->src[1]); > - fs_reg data1 = get_nir_src(instr->src[2]); > + fs_reg data1; > + if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) > + data1 = get_nir_src(instr->src[2]); > fs_reg data2; > if (op == BRW_AOP_CMPWR) > data2 = get_nir_src(instr->src[3]); > @@ -4962,7 +4979,9 @@ fs_visitor::nir_emit_shared_atomic(const fs_builder > &bld, > > fs_reg surface = brw_imm_ud(GEN7_BTI_SLM); > fs_reg offset; > - fs_reg data1 = get_nir_src(instr->src[1]); > + fs_reg data1; > + if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC) > + data1 = get_nir_src(instr->src[1]); > fs_reg data2; > if (op == BRW_AOP_CMPWR) > data2 = get_nir_src(instr->src[2]); > -- > 2.14.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev