Given the fact that we have multiple possible uses for such an opcode,
I've been wondering if it wouldn't be better to simply have a
SHADER_OPCODE_INDIRECT_MOV opcode that works on pretty much any
register type.  Given that they all get lowered away to HW_REG before
the end, the emit code wouldn't have to do anything special.  This
could simply be an INDIRECT_MOV with an ATTR source while my uniform
opcode would use a UNIFORM source.  If we did this, we would have to
have the immediate "range" argument be in bytes, but that's not a huge
deal.

On Sat, Nov 7, 2015 at 9:03 PM, Kenneth Graunke <kenn...@whitecape.org> wrote:
> The geometry and tessellation control shader stages both read from
> multiple URB entries (one per vertex).  The thread payload contains
> several URB handles which reference these separate memory segments.
>
> In GLSL, these inputs are represented as per-vertex arrays; the
> outermost array index selects which vertex's inputs to read.  This
> array index does not necessarily need to be constant.
>
> To handle that, we need to use indirect addressing on GRFs to select
> which of the thread payload registers has the appropriate URB handle.
> (This is before we can even think about applying the pull model!)
>
> This patch introduces a new opcode which performs a MOV from a
> source using VxH indirect addressing (which allows each of the 8
> SIMD channels to select distinct data.)  It also marks a whole
> segment of the payload as "used", so the register allocator recognizes
> the read and avoids reusing those registers.
>
> Signed-off-by: Kenneth Graunke <kenn...@whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_defines.h           | 11 ++++++++
>  src/mesa/drivers/dri/i965/brw_fs.h                |  4 +++
>  src/mesa/drivers/dri/i965/brw_fs_cse.cpp          |  1 +
>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp    | 32 
> +++++++++++++++++++++++
>  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 10 +++++++
>  src/mesa/drivers/dri/i965/brw_shader.cpp          |  2 ++
>  6 files changed, 60 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
> b/src/mesa/drivers/dri/i965/brw_defines.h
> index 6433cff..288d8b2 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -1264,6 +1264,17 @@ enum opcode {
>      * Calculate the high 32-bits of a 32x32 multiply.
>      */
>     SHADER_OPCODE_MULH,
> +
> +   /**
> +    * A SIMD8 VxH indirect addressed MOV from the thread payload.
> +    *
> +    * This can be used to select GS or TCS input URB handles.
> +    *
> +    * Source 0: Immediate offset in bytes (UD immediate).
> +    * Source 1: Indirect offset in bytes (UD GRF).
> +    * Source 2: Number of registers that could be indirectly addressed.
> +    */
> +   SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV,
>  };
>
>  enum brw_urb_write_flags {
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
> b/src/mesa/drivers/dri/i965/brw_fs.h
> index 8a93b56..fb70f0c 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -526,6 +526,10 @@ private:
>                                   struct brw_reg offset,
>                                   struct brw_reg value);
>
> +   void generate_indirect_thread_payload_mov(struct brw_reg dst,
> +                                             struct brw_reg imm_byte_offset,
> +                                             struct brw_reg 
> indirect_byte_offset);
> +
>     bool patch_discard_jumps_to_fb_writes();
>
>     const struct brw_compiler *compiler;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> index 3a28c8d..699baab 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
> @@ -78,6 +78,7 @@ is_expression(const fs_visitor *v, const fs_inst *const 
> inst)
>     case FS_OPCODE_LINTERP:
>     case SHADER_OPCODE_FIND_LIVE_CHANNEL:
>     case SHADER_OPCODE_BROADCAST:
> +   case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV:
>        return true;
>     case SHADER_OPCODE_RCP:
>     case SHADER_OPCODE_RSQ:
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index e207a77..7d51c0e 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -368,6 +368,33 @@ fs_generator::generate_fb_write(fs_inst *inst, struct 
> brw_reg payload)
>  }
>
>  void
> +fs_generator::generate_indirect_thread_payload_mov(struct brw_reg dst,
> +                                                   struct brw_reg 
> imm_byte_offset_reg,
> +                                                   struct brw_reg 
> indirect_byte_offset)
> +{
> +   assert(imm_byte_offset_reg.type == BRW_REGISTER_TYPE_UD);
> +   assert(imm_byte_offset_reg.file == BRW_IMMEDIATE_VALUE);
> +   assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
> +   assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
> +   unsigned imm_byte_offset = imm_byte_offset_reg.dw1.ud;
> +
> +   /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
> +   struct brw_reg addr = vec8(brw_address_reg(0));
> +
> +   /* The destination stride of an instruction (in bytes) must be greater
> +    * than or equal to the size of the rest of the instruction.  Since the
> +    * address register is of type UW, we can't use a D-type instruction.
> +    * In order to get around this, re re-type to UW and use a stride.
> +    */
> +   indirect_byte_offset =
> +      retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
> +
> +   brw_MOV(p, addr, indirect_byte_offset);
> +   brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
> +   brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
> +}
> +
> +void
>  fs_generator::generate_urb_read(fs_inst *inst,
>                                  struct brw_reg dst,
>                                  struct brw_reg header)
> @@ -2085,6 +2112,11 @@ fs_generator::generate_code(const cfg_t *cfg, int 
> dispatch_width)
>           fill_count++;
>          break;
>
> +      case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV:
> +         assert(inst->exec_size == 8);
> +         generate_indirect_thread_payload_mov(dst, src[0], src[1]);
> +         break;
> +
>        case SHADER_OPCODE_URB_READ_SIMD8:
>           generate_urb_read(inst, dst, src[0]);
>           break;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> index 9251d95..648a0f8 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
> @@ -389,6 +389,16 @@ void fs_visitor::calculate_payload_ranges(int 
> payload_node_count,
>        case CS_OPCODE_CS_TERMINATE:
>           payload_last_use_ip[0] = use_ip;
>           break;
> +      case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV: {
> +         assert(inst->src[0].file == IMM && inst->src[2].file == IMM);
> +         int first_reg = inst->src[0].fixed_hw_reg.dw1.ud / REG_SIZE;
> +         int num_regs = inst->src[2].fixed_hw_reg.dw1.ud;
> +
> +         for (int i = 0; i < num_regs; i++) {
> +            payload_last_use_ip[first_reg + i] = use_ip;
> +         }
> +         break;
> +      }
>
>        default:
>           if (inst->eot) {
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
> b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 4ea297a..5e407e9 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -546,6 +546,8 @@ brw_instruction_name(enum opcode op)
>        return "barrier";
>     case SHADER_OPCODE_MULH:
>        return "mulh";
> +   case SHADER_OPCODE_INDIRECT_THREAD_PAYLOAD_MOV:
> +      return "indirect_thread_payload_mov";
>     }
>
>     unreachable("not reached");
> --
> 2.6.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to