On Thu, Apr 14, 2016 at 2:32 PM, Kenneth Graunke <kenn...@whitecape.org> wrote:
> On Tuesday, March 22, 2016 3:33:39 PM PDT Jason Ekstrand wrote: > > While we're at it, we also add support for the possibility that the > > indirect is, in fact, a constant. This shouldn't happen in the common > case > > (if it does, that means NIR failed to constant-fold something), but it's > > possible so we should handle it. > > --- > > src/mesa/drivers/dri/i965/brw_fs.cpp | 4 ++ > > src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 67 ++++++++++++++++++++ > +----- > > 2 files changed, 58 insertions(+), 13 deletions(-) > > > > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/ > i965/brw_fs.cpp > > index d41c8a8..91487c9 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp > > @@ -4481,6 +4481,10 @@ get_lowered_simd_width(const struct > brw_device_info > *devinfo, > > case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: > > return 8; > > > > + case SHADER_OPCODE_MOV_INDIRECT: > > + /* Prior to Broadwell, we only have 8 address subregisters */ > > + return devinfo->gen < 8 ? 8 : inst->exec_size; > > + > > default: > > return inst->exec_size; > > } > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/ > drivers/dri/i965/brw_fs_generator.cpp > > index 35400cb..4e89a8a 100644 > > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp > > @@ -351,22 +351,63 @@ fs_generator::generate_mov_indirect(fs_inst *inst, > > > > unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr; > > > > - /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */ > > - struct brw_reg addr = vec8(brw_address_reg(0)); > > + if (indirect_byte_offset.file == BRW_IMMEDIATE_VALUE) { > > + imm_byte_offset += indirect_byte_offset.ud; > > > > - /* The destination stride of an instruction (in bytes) must be > greater > > - * than or equal to the size of the rest of the instruction. Since > the > > - * address register is of type UW, we can't use a D-type instruction. > > - * In order to get around this, re re-type to UW and use a stride. > > - */ > > - indirect_byte_offset = > > - retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); > > + reg.nr = imm_byte_offset / REG_SIZE; > > + reg.subnr = imm_byte_offset % REG_SIZE; > > + brw_MOV(p, dst, reg); > > + } else { > > + /* Prior to Broadwell, there are only 8 address registers. */ > > + assert(inst->exec_size == 8 || devinfo->gen >= 8); > > + > > + /* We use VxH indirect addressing, clobbering a0.0 through a0.7. > */ > > + struct brw_reg addr = vec8(brw_address_reg(0)); > > + > > + /* The destination stride of an instruction (in bytes) must be > greater > > + * than or equal to the size of the rest of the instruction. > Since > the > > + * address register is of type UW, we can't use a D-type > instruction. > > + * In order to get around this, re re-type to UW and use a stride. > > + */ > > + indirect_byte_offset = > > + retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW); > > + > > + struct brw_reg ind_src; > > + if (devinfo->gen < 8) { > > + /* Prior to broadwell, we have a restriction that the bottom 5 > bits > > + * of the base offset and the bottom 5 bits of the indirect > must > add > > + * to less than 32. In other words, the hardware needs to be > able > to > > + * add the bottom five bits of the two to get the subnumber and > add > > + * the next 7 bits of each to get the actual register number. > Since > > + * the indirect may cause us to cross a register boundary, this > makes > > + * it almost useless. We could try and do something clever > where > we > > + * use a actual base offset if base_offset % 32 == 0 but that > would > > + * mean we were generating different code depending on the base > > + * offset. Instead, for the sake of consistency, we'll just do > the > > + * add ourselves. > > + */ > > + brw_ADD(p, addr, indirect_byte_offset, > brw_imm_uw(imm_byte_offset)); > > + ind_src = brw_VxH_indirect(0, 0); > > + } else { > > + brw_MOV(p, addr, indirect_byte_offset); > > + ind_src = brw_VxH_indirect(0, imm_byte_offset); > > + } > > > > - /* Prior to Broadwell, there are only 8 address registers. */ > > - assert(inst->exec_size == 8 || devinfo->gen >= 8); > > + brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type)); > > > > - brw_MOV(p, addr, indirect_byte_offset); > > - brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), > dst.type)); > > + if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE && > > + !inst->get_next()->is_head_sentinel() && > > + ((fs_inst *)inst->get_next())->mlen > 0) { > > Don't you mean: > > if (devinfo->gen == 6 && dst.file == BRW_MESSAGE_REGISTER_FILE && > !inst->get_prev()->is_head_sentinel() && > ((fs_inst *) inst->get_prev())->mlen > 0) { > > Seems like you're walking forward while facing backward... > I think I want is_tail_sentinel() but I do want to look at the next instruction > > > + /* From the Sandybridge PRM: > > + * > > + * "[Errata: DevSNB(SNB)] If MRF register is updated by any > > + * instruction that “indexed/indirect” source AND is > followed > by a > > + * send, the instruction requires a “Switch”. This is to > avoid > > + * race condition where send may dispatch before MRF is > updated." > > + */ > > + brw_inst_set_thread_control(devinfo, mov, BRW_THREAD_SWITCH); > > + } > > + } > > } > > > > void >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev