On Thu, Mar 5, 2015 at 9:39 PM, Jason Ekstrand <ja...@jlekstrand.net> wrote:
> This looks fine to me. I just kicked off a build on our test farm and, > assuming that looks good (I'll send another e-mail in the morning if it > does), > > Reviewed-by: Jason Ekstrand <jason.ekstr...@intel.com> > Jenkins results look god so feel free to apply the R-B above and push it. Don't worry about the shader-db number given that, as ken pointed out, shader-db is kind of useless for this. I wish we knew how many SIMD16 programs this gave us in practice, but short of doing lots of shader-db work, we can't know at the moment so don't worry about it. --Jason > > I ran shader-db on the change and I was kind of surprised to see that it > doesn't really do anything. > > GAINED: shaders/dolphin/smg.1.shader_test FS SIMD16 > > total instructions in shared programs: 5769629 -> 5769629 (0.00%) > instructions in affected programs: 0 -> 0 > helped: 0 > HURT: 0 > GAINED: 1 > LOST: 0 > > Perhaps shader-db doesn't account for some other GL state required for > dual-source because I doubt only one shader uses it. Ken? > > --Jason > > On Thu, Mar 5, 2015 at 3:21 AM, Iago Toral Quiroga <ito...@igalia.com> > wrote: > >> From the SNB PRM, volume 4, part 1, page 193: >> >> "The dual source render target messages only have SIMD8 forms due to >> maximum message length limitations. SIMD16 pixel shaders must send two of >> these messages to cover all of the pixels. Each message contains two >> colors >> (4 channels each) for each pixel in the message payload." >> >> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831 >> --- >> I sent this patch for review some months ago, but it was bad timing >> because >> it was when Jason was doing a large rewrite of the visitor code handling >> FB writes, so the patch became immediately obsolete. This is the >> up-to-date >> version. >> >> If anyone wants to test this, I sent this patch to piglit with a test that >> can be used to check for correct SIMD16 implementation specifically: >> http://lists.freedesktop.org/archives/piglit/2015-March/015015.html >> >> src/mesa/drivers/dri/i965/brw_eu.h | 1 + >> src/mesa/drivers/dri/i965/brw_eu_emit.c | 3 +- >> src/mesa/drivers/dri/i965/brw_fs.h | 6 +- >> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 15 ++++- >> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 77 >> +++++++++++++++++++++----- >> 5 files changed, 83 insertions(+), 19 deletions(-) >> >> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h >> b/src/mesa/drivers/dri/i965/brw_eu.h >> index 736c54b..d9ad5bd 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu.h >> +++ b/src/mesa/drivers/dri/i965/brw_eu.h >> @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p, >> unsigned msg_length, >> unsigned response_length, >> bool eot, >> + bool last_render_target, >> bool header_present); >> >> void brw_SAMPLE(struct brw_compile *p, >> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c >> b/src/mesa/drivers/dri/i965/brw_eu_emit.c >> index 1d6fd67..74cf138 100644 >> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c >> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c >> @@ -2292,6 +2292,7 @@ void brw_fb_WRITE(struct brw_compile *p, >> unsigned msg_length, >> unsigned response_length, >> bool eot, >> + bool last_render_target, >> bool header_present) >> { >> struct brw_context *brw = p->brw; >> @@ -2333,7 +2334,7 @@ void brw_fb_WRITE(struct brw_compile *p, >> msg_type, >> msg_length, >> header_present, >> - eot, /* last render target write */ >> + last_render_target, >> response_length, >> eot, >> 0 /* send_commit_msg */); >> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h >> b/src/mesa/drivers/dri/i965/brw_fs.h >> index 70098d8..5a4f66c 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs.h >> +++ b/src/mesa/drivers/dri/i965/brw_fs.h >> @@ -369,10 +369,12 @@ public: >> bool optimize_frontfacing_ternary(nir_alu_instr *instr, >> const fs_reg &result); >> >> - int setup_color_payload(fs_reg *dst, fs_reg color, unsigned >> components); >> + int setup_color_payload(fs_reg *dst, fs_reg color, unsigned >> components, >> + bool use_2nd_half); >> void emit_alpha_test(); >> fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2, >> - fs_reg src0_alpha, unsigned components); >> + fs_reg src0_alpha, unsigned components, >> + bool use_2nd_half = false); >> void emit_fb_writes(); >> void emit_urb_writes(); >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> index cbe6191..db70df5 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp >> @@ -217,9 +217,12 @@ fs_generator::fire_fb_write(fs_inst *inst, >> >> if (inst->opcode == FS_OPCODE_REP_FB_WRITE) >> msg_control = >> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; >> - else if (prog_data->dual_src_blend) >> - msg_control = >> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; >> - else if (dispatch_width == 16) >> + else if (prog_data->dual_src_blend) { >> + if (dispatch_width == 8 || !inst->eot) >> + msg_control = >> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; >> + else >> + msg_control = >> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23; >> + } else if (dispatch_width == 16) >> msg_control = >> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; >> else >> msg_control = >> BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; >> @@ -227,6 +230,10 @@ fs_generator::fire_fb_write(fs_inst *inst, >> uint32_t surf_index = >> prog_data->binding_table.render_target_start + inst->target; >> >> + bool last_render_target = inst->eot || >> + (prog_data->dual_src_blend && >> dispatch_width == 16); >> + >> + >> brw_fb_WRITE(p, >> dispatch_width, >> payload, >> @@ -236,6 +243,7 @@ fs_generator::fire_fb_write(fs_inst *inst, >> nr, >> 0, >> inst->eot, >> + last_render_target, >> inst->header_present); >> >> brw_mark_surface_used(&prog_data->base, surf_index); >> @@ -373,6 +381,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst) >> inst->mlen, >> 0, >> true, >> + true, >> inst->header_present); >> } >> >> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> index 57c4d66..d3b8345 100644 >> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp >> @@ -3366,7 +3366,8 @@ fs_visitor::emit_interpolation_setup_gen6() >> } >> >> int >> -fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned >> components) >> +fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned >> components, >> + bool use_2nd_half) >> { >> brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; >> fs_inst *inst; >> @@ -3384,7 +3385,7 @@ fs_visitor::setup_color_payload(fs_reg *dst, fs_reg >> color, unsigned components) >> colors_enabled = (1 << components) - 1; >> } >> >> - if (dispatch_width == 8 || brw->gen >= 6) { >> + if (dispatch_width == 8 || (brw->gen >= 6 && !do_dual_src)) { >> /* SIMD8 write looks like: >> * m + 0: r0 >> * m + 1: r1 >> @@ -3415,6 +3416,33 @@ fs_visitor::setup_color_payload(fs_reg *dst, >> fs_reg color, unsigned components) >> len++; >> } >> return len; >> + } else if (brw->gen >= 6 && do_dual_src) { >> + /* SIMD16 dual source blending for gen6+. >> + * >> + * From the SNB PRM, volume 4, part 1, page 193: >> + * >> + * "The dual source render target messages only have SIMD8 forms >> due to >> + * maximum message length limitations. SIMD16 pixel shaders must >> send two >> + * of these messages to cover all of the pixels. Each message >> contains >> + * two colors (4 channels each) for each pixel in the message >> payload." >> + * >> + * So in SIMD16 dual source blending we will send 2 SIMD8 messages, >> + * each one will call this function twice (one for each color >> involved), >> + * so in each pass we only write 4 registers. Notice that the >> second >> + * SIMD8 message needs to read color data from the 2nd half of the >> color >> + * registers, so it needs to call this with use_2nd_half = true. >> + */ >> + for (unsigned i = 0; i < 4; ++i) { >> + if (colors_enabled & (1 << i)) { >> + dst[i] = fs_reg(GRF, alloc.allocate(1), color.type); >> + inst = emit(MOV(dst[i], half(offset(color, i), >> + use_2nd_half ? 1 : 0))); >> + inst->saturate = key->clamp_fragment_color; >> + if (use_2nd_half) >> + inst->force_sechalf = true; >> + } >> + } >> + return 4; >> } else { >> /* pre-gen6 SIMD16 single source DP write looks like: >> * m + 0: r0 >> @@ -3498,7 +3526,8 @@ fs_visitor::emit_alpha_test() >> >> fs_inst * >> fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1, >> - fs_reg src0_alpha, unsigned components) >> + fs_reg src0_alpha, unsigned components, >> + bool use_2nd_half) >> { >> assert(stage == MESA_SHADER_FRAGMENT); >> brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; >> @@ -3558,7 +3587,8 @@ fs_visitor::emit_single_fb_write(fs_reg color0, >> fs_reg color1, >> * alpha out the pipeline to our null renderbuffer to support >> * alpha-testing, alpha-to-coverage, and so on. >> */ >> - length += setup_color_payload(sources + length, this->outputs[0], >> 0); >> + length += setup_color_payload(sources + length, this->outputs[0], >> 0, >> + false); >> } else if (color1.file == BAD_FILE) { >> if (src0_alpha.file != BAD_FILE) { >> sources[length] = fs_reg(GRF, alloc.allocate(reg_size), >> @@ -3568,10 +3598,13 @@ fs_visitor::emit_single_fb_write(fs_reg color0, >> fs_reg color1, >> length++; >> } >> >> - length += setup_color_payload(sources + length, color0, >> components); >> + length += setup_color_payload(sources + length, color0, components, >> + false); >> } else { >> - length += setup_color_payload(sources + length, color0, >> components); >> - length += setup_color_payload(sources + length, color1, >> components); >> + length += setup_color_payload(sources + length, color0, components, >> + use_2nd_half); >> + length += setup_color_payload(sources + length, color1, components, >> + use_2nd_half); >> } >> >> if (source_depth_to_render_target) { >> @@ -3640,12 +3673,6 @@ fs_visitor::emit_fb_writes() >> brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; >> brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; >> >> - if (do_dual_src) { >> - no16("GL_ARB_blend_func_extended not yet supported in SIMD16."); >> - if (dispatch_width == 16) >> - do_dual_src = false; >> - } >> - >> fs_inst *inst; >> if (do_dual_src) { >> if (INTEL_DEBUG & DEBUG_SHADER_TIME) >> @@ -3656,6 +3683,30 @@ fs_visitor::emit_fb_writes() >> inst = emit_single_fb_write(this->outputs[0], >> this->dual_src_output, >> reg_undef, 4); >> inst->target = 0; >> + >> + /* SIMD16 dual source blending requires to send two SIMD8 dual >> source >> + * messages, where each message contains color data for 8 pixels. >> Color >> + * data for the first group of pixels is stored in the "lower" >> half of >> + * the color registers, so in SIMD16, the previous message did: >> + * m + 0: r0 >> + * m + 1: g0 >> + * m + 2: b0 >> + * m + 3: a0 >> + * >> + * Here goes the second message, which packs color data for the >> + * remaining 8 pixels. Color data for these pixels is stored in the >> + * "upper" half of the color registers, so we need to do: >> + * m + 0: r1 >> + * m + 1: g1 >> + * m + 2: b1 >> + * m + 3: a1 >> + */ >> + if (dispatch_width == 16) { >> + inst = emit_single_fb_write(this->outputs[0], >> this->dual_src_output, >> + reg_undef, 4, true); >> + inst->target = 0; >> + } >> + >> prog_data->dual_src_blend = true; >> } else if (key->nr_color_regions > 0) { >> for (int target = 0; target < key->nr_color_regions; target++) { >> -- >> 1.9.1 >> >> _______________________________________________ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> > >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev