Am 01.02.2018 um 09:21 schrieb Dave Airlie:
> From: Dave Airlie <airl...@redhat.com>
> 
> This passes the CTS and piglit tests.
> 
> This also disable sb for helper invocations until it doesn't
> mess up the VPM flags.
> 
> Thanks to Ilia and Glenn for advice, and Roland for working
> out the working evergreen path.
> ---
>  src/gallium/drivers/r600/r600_asm.c    |   7 +-
>  src/gallium/drivers/r600/r600_isa.c    |   1 +
>  src/gallium/drivers/r600/r600_isa.h    |   5 +-
>  src/gallium/drivers/r600/r600_shader.c | 113 
> +++++++++++++++++++++++++++++++++
>  src/gallium/drivers/r600/r600_shader.h |   1 +
>  src/gallium/drivers/r600/r600_sq.h     |   2 +
>  6 files changed, 126 insertions(+), 3 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_asm.c 
> b/src/gallium/drivers/r600/r600_asm.c
> index 21d069d..ec2d34e 100644
> --- a/src/gallium/drivers/r600/r600_asm.c
> +++ b/src/gallium/drivers/r600/r600_asm.c
> @@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>                               fprintf(stderr, "%04d %08X %08X  %s ", id, 
> bc->bytecode[id],
>                                               bc->bytecode[id + 1], 
> cfop->name);
>                               fprintf(stderr, "%d @%d ", cf->ndw / 4, 
> cf->addr);
> -                             fprintf(stderr, "\n");
> +                             if (cf->vpm)
> +                                     fprintf(stderr, "VPM ");
>                               if (cf->end_of_program)
>                                       fprintf(stderr, "EOP ");
> +                             fprintf(stderr, "\n");
> +
>                       } else if (cfop->flags & CF_EXP) {
>                               int o = 0;
>                               const char *exp_type[] = {"PIXEL", "POS  ", 
> "PARAM"};
> @@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>                                       fprintf(stderr, "POP:%X ", 
> cf->pop_count);
>                               if (cf->count && (cfop->flags & CF_EMIT))
>                                       fprintf(stderr, "STREAM%d ", cf->count);
> +                             if (cf->vpm)
> +                                     fprintf(stderr, "VPM ");
>                               if (cf->end_of_program)
>                                       fprintf(stderr, "EOP ");
>                               fprintf(stderr, "\n");
> diff --git a/src/gallium/drivers/r600/r600_isa.c 
> b/src/gallium/drivers/r600/r600_isa.c
> index 2633cdc..611b370 100644
> --- a/src/gallium/drivers/r600/r600_isa.c
> +++ b/src/gallium/drivers/r600/r600_isa.c
> @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
>               {"ALU_EXT",                       {   -1,   -1, 0x0C, 0x0C },  
> CF_CLAUSE | CF_ALU | CF_ALU_EXT  },
>               {"ALU_CONTINUE",                  { 0x0D, 0x0D, 0x0D,   -1 },  
> CF_CLAUSE | CF_ALU  },
>               {"ALU_BREAK",                     { 0x0E, 0x0E, 0x0E,   -1 },  
> CF_CLAUSE | CF_ALU  },
> +             {"ALU_VALID_PIXEL_MODE",          {   -1,   -1,   -1, 0x0E },  
> CF_CLAUSE | CF_ALU  },
>               {"ALU_ELSE_AFTER",                { 0x0F, 0x0F, 0x0F, 0x0F },  
> CF_CLAUSE | CF_ALU  },
>               {"CF_NATIVE",                     { 0x00, 0x00, 0x00, 0x00 },  
> 0  }
>  };
> diff --git a/src/gallium/drivers/r600/r600_isa.h 
> b/src/gallium/drivers/r600/r600_isa.h
> index f6e2697..fcaf1f7 100644
> --- a/src/gallium/drivers/r600/r600_isa.h
> +++ b/src/gallium/drivers/r600/r600_isa.h
> @@ -646,10 +646,11 @@ struct cf_op_info
>  #define CF_OP_ALU_EXT                      84
>  #define CF_OP_ALU_CONTINUE                 85
>  #define CF_OP_ALU_BREAK                    86
> -#define CF_OP_ALU_ELSE_AFTER               87
> +#define CF_OP_ALU_VALID_PIXEL_MODE         87
> +#define CF_OP_ALU_ELSE_AFTER               88
>  
>  /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
> -#define CF_NATIVE                          88
> +#define CF_NATIVE                          89
>  
>  enum r600_chip_class {
>       ISA_CC_R600,
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index a462691..9388db9 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
>  
>       use_sb &= !shader->shader.uses_atomics;
>       use_sb &= !shader->shader.uses_images;
> +     use_sb &= !shader->shader.uses_helper_invocation;
>  
>       /* Check if the bytecode has already been built. */
>       if (!shader->shader.bc.bytecode) {
> @@ -346,6 +347,7 @@ struct r600_shader_ctx {
>       boolean                 clip_vertex_write;
>       unsigned                cv_output;
>       unsigned                edgeflag_output;
> +     int                                     helper_invoc_reg;
>       int                                     cs_block_size_reg;
>       int                                     cs_grid_size_reg;
>       bool cs_block_size_loaded, cs_grid_size_loaded;
> @@ -1295,6 +1297,93 @@ static int load_sample_position(struct r600_shader_ctx 
> *ctx, struct r600_shader_
>       return t1;
>  }
>  
> +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
> +{
> +     int r;
> +     struct r600_bytecode_alu alu;
> +
> +     /* do a vtx fetch with wqm set on the vtx fetch */
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = ALU_OP1_MOV;
> +     alu.dst.sel = ctx->helper_invoc_reg;
> +     alu.dst.chan = 0;
> +     alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> +     alu.src[0].value = 0xffffffff;
> +     alu.dst.write = 1;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     /* do a vtx fetch in VPM mode */
> +     struct r600_bytecode_vtx vtx;
> +     memset(&vtx, 0, sizeof(vtx));
> +     vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
> +     vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
> +     vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
> +     vtx.src_gpr = 0;
> +     vtx.mega_fetch_count = 16; /* no idea here really... */
> +     vtx.dst_gpr = ctx->helper_invoc_reg;
> +     vtx.dst_sel_x = 4;
> +     vtx.dst_sel_y = 7;              /* SEL_Y */
> +     vtx.dst_sel_z = 7;              /* SEL_Z */
> +     vtx.dst_sel_w = 7;              /* SEL_W */
> +     vtx.data_format = FMT_32;
> +     if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
> +             return r;
> +     ctx->bc->cf_last->vpm = 1;
> +
> +     /* compare the result with 0 */
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = ALU_OP3_CNDE_INT;
> +     alu.is_op3 = 1;
> +     alu.dst.sel = ctx->helper_invoc_reg;
> +     alu.dst.chan = 0;
> +     alu.dst.write = 1;
> +     alu.src[0].sel = ctx->helper_invoc_reg;
> +     alu.src[0].chan = 0;
> +     alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
> +     alu.src[1].value = 0x0;
> +     alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
> +     alu.src[2].value = 0xffffffff;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
I realized this only later, this alu conditional is completely
unnecessary, just skip it...

Other than that,
Reviewed-by: Roland Scheidegger <srol...@vmware.com>



> +     return 0;
> +}
> +
> +static int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
> +{
> +     int r;
> +     struct r600_bytecode_alu alu;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = ALU_OP1_MOV;
> +     alu.dst.sel = ctx->helper_invoc_reg;
> +     alu.dst.chan = 0;
> +     alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> +     alu.src[0].value = 0xffffffff;
> +     alu.dst.write = 1;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
> +     if (r)
> +             return r;
> +
> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +     alu.op = ALU_OP1_MOV;
> +     alu.dst.sel = ctx->helper_invoc_reg;
> +     alu.dst.chan = 0;
> +     alu.src[0].sel = V_SQ_ALU_SRC_0;
> +     alu.dst.write = 1;
> +     alu.last = 1;
> +     r = r600_bytecode_add_alu_type(ctx->bc, &alu, 
> CF_OP_ALU_VALID_PIXEL_MODE);
> +     if (r)
> +             return r;
> +
> +     return ctx->helper_invoc_reg;
> +}
> +
>  static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
>  {
>       struct r600_bytecode_vtx vtx;
> @@ -1458,6 +1547,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
>                       r600_src->sel = load_block_grid_size(ctx, false);
>               } else if 
> (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
> TGSI_SEMANTIC_BLOCK_SIZE) {
>                       r600_src->sel = load_block_grid_size(ctx, true);
> +             } else if 
> (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == 
> TGSI_SEMANTIC_HELPER_INVOCATION) {
> +                     r600_src->sel = ctx->helper_invoc_reg;
> +                     r600_src->swizzle[0] = 0;
> +                     r600_src->swizzle[1] = 0;
> +                     r600_src->swizzle[2] = 0;
> +                     r600_src->swizzle[3] = 0;
>               }
>       } else {
>               if (tgsi_src->Register.Indirect)
> @@ -3120,6 +3215,7 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>       tgsi_scan_shader(tokens, &ctx.info);
>       shader->indirect_files = ctx.info.indirect_files;
>  
> +     shader->uses_helper_invocation = false;
>       shader->uses_doubles = ctx.info.uses_doubles;
>       shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
>       shader->nsys_inputs = 0;
> @@ -3193,6 +3289,7 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>       ctx.clip_vertex_write = 0;
>       ctx.thread_id_gpr_loaded = false;
>  
> +     ctx.helper_invoc_reg = -1;
>       ctx.cs_block_size_reg = -1;
>       ctx.cs_grid_size_reg = -1;
>       ctx.cs_block_size_loaded = false;
> @@ -3238,6 +3335,13 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>                       ctx.file_offset[TGSI_FILE_INPUT] = 
> evergreen_gpr_count(&ctx);
>               else
>                       ctx.file_offset[TGSI_FILE_INPUT] = 
> allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
> +
> +             for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
> +                     if (ctx.info.system_value_semantic_name[i] == 
> TGSI_SEMANTIC_HELPER_INVOCATION) {
> +                             ctx.helper_invoc_reg = 
> ctx.file_offset[TGSI_FILE_INPUT]++;
> +                             shader->uses_helper_invocation = true;
> +                     }
> +             }
>       }
>       if (ctx.type == PIPE_SHADER_GEOMETRY) {
>               /* FIXME 1 would be enough in some cases (3 or less input 
> vertices) */
> @@ -3439,6 +3543,15 @@ static int r600_shader_from_tgsi(struct r600_context 
> *rctx,
>       if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
>               shader->nr_ps_max_color_exports = 8;
>  
> +     if (ctx.shader->uses_helper_invocation) {
> +             if (ctx.bc->chip_class == CAYMAN)
> +                     r = cm_load_helper_invocation(&ctx);
> +             else
> +                     r = eg_load_helper_invocation(&ctx);
> +             if (r)
> +                     return r;
> +
> +     }
>       if (ctx.fragcoord_input >= 0) {
>               if (ctx.bc->chip_class == CAYMAN) {
>                       for (j = 0 ; j < 4; j++) {
> diff --git a/src/gallium/drivers/r600/r600_shader.h 
> b/src/gallium/drivers/r600/r600_shader.h
> index 8444907..da96688 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -119,6 +119,7 @@ struct r600_shader {
>       boolean                 uses_doubles;
>       boolean                 uses_atomics;
>       boolean                 uses_images;
> +     boolean                 uses_helper_invocation;
>       uint8_t                 atomic_base;
>       uint8_t                 rat_base;
>       uint8_t                 image_size_const_offset;
> diff --git a/src/gallium/drivers/r600/r600_sq.h 
> b/src/gallium/drivers/r600/r600_sq.h
> index f51ffcf..6b07dc1 100644
> --- a/src/gallium/drivers/r600/r600_sq.h
> +++ b/src/gallium/drivers/r600/r600_sq.h
> @@ -198,6 +198,8 @@
>  #define     EG_V_SQ_ALU_SRC_LDS_DIRECT_B                             
> 0x000000E0
>  #define     EG_V_SQ_ALU_SRC_TIME_HI                                  
> 0x000000E3
>  #define     EG_V_SQ_ALU_SRC_TIME_LO                                  
> 0x000000E4
> +#define     EG_V_SQ_ALU_SRC_MASK_HI                                  
> 0x000000E5
> +#define     EG_V_SQ_ALU_SRC_MASK_LO                                  
> 0x000000E6
>  #define     EG_V_SQ_ALU_SRC_HW_WAVE_ID                               
> 0x000000E7
>  #define     EG_V_SQ_ALU_SRC_SIMD_ID                                  
> 0x000000E8
>  #define     EG_V_SQ_ALU_SRC_SE_ID                                    
> 0x000000E9
> 

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to