Since the SF/SBE stage is only capable of performing arbitrary reorderings of 16 varying slots, we can't arrange the fragment shader inputs in an arbitrary order if there are more than 16 input varying slots in use. We need to make sure that slots 16-31 match the corresponding outputs of the previous pipeline stage.
The easiest way to accomplish this is to just make all varying slots match up with the previous pipeline stage. --- src/mesa/drivers/dri/i965/brw_fs.cpp | 42 ++++++++++++++++++++++++++++++------ src/mesa/drivers/dri/i965/brw_wm.c | 3 ++- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 7950d5f6..8d73a0f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -1237,11 +1237,40 @@ fs_visitor::calculate_urb_setup() int urb_next = 0; /* Figure out where each of the incoming setup attributes lands. */ if (brw->gen >= 6) { - for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { - if (fp->Base.InputsRead & BRW_FS_VARYING_INPUT_MASK & - BITFIELD64_BIT(i)) { - c->prog_data.urb_setup[i] = urb_next++; - } + if (_mesa_bitcount_64(fp->Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) <= 16) { + /* The SF/SBE pipeline stage can do arbitrary rearrangement of the + * first 16 varying inputs, so we can put them wherever we want. + * Just put them in order. + */ + for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { + if (fp->Base.InputsRead & BRW_FS_VARYING_INPUT_MASK & + BITFIELD64_BIT(i)) { + c->prog_data.urb_setup[i] = urb_next++; + } + } + } else { + /* We have enough input varyings that the SF/SBE pipeline stage can't + * arbitrarily rearrange them to suit our whim; we have to put them + * in an order that matches the output of the previous pipeline stage + * (geometry or vertex shader). + */ + struct brw_vue_map prev_stage_vue_map; + brw_compute_vue_map(brw, &prev_stage_vue_map, + c->key.input_slots_valid); + int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET; + assert(prev_stage_vue_map.num_slots <= first_slot + 32); + for (int slot = first_slot; slot < prev_stage_vue_map.num_slots; + slot++) { + int varying = prev_stage_vue_map.slot_to_varying[slot]; + if (varying != BRW_VARYING_SLOT_COUNT && + (fp->Base.InputsRead & BRW_FS_VARYING_INPUT_MASK & + BITFIELD64_BIT(varying))) { + c->prog_data.urb_setup[varying] = slot - first_slot; + urb_next = MAX2(urb_next, slot + 1); + } + } + urb_next = prev_stage_vue_map.num_slots - first_slot; } } else { /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */ @@ -3149,7 +3178,8 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog) key.iz_lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; } - if (brw->gen < 6) + if (brw->gen < 6 || _mesa_bitcount_64(fp->Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) key.input_slots_valid = fp->Base.InputsRead | VARYING_BIT_POS; key.clamp_fragment_color = ctx->API == API_OPENGL_COMPAT; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 3df2b7d..3e59880 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -466,7 +466,8 @@ static void brw_wm_populate_key( struct brw_context *brw, (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); /* BRW_NEW_VUE_MAP_GEOM_OUT */ - if (brw->gen < 6) + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) key->input_slots_valid = brw->vue_map_geom_out.slots_valid; /* The unique fragment program ID */ -- 1.8.4 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev