Re: [Mesa-dev] [PATCH] radeonsi: add GS multiple streams support (v2)

Marek Olšák Thu, 30 Jul 2015 00:56:07 -0700

Reviewed-by: Marek Olšák <marek.ol...@amd.com>

Marek


On Thu, Jul 30, 2015 at 2:06 AM, Dave Airlie <airl...@gmail.com> wrote:
> From: Dave Airlie <airl...@redhat.com>
>
> This is the final piece for ARB_gpu_shader5,
>
> The code is based on the r600 code from Glenn Kennard,
> and myself.
>
> While developing this, I'm not 100% sure of all the calculations
> made in the GS registers, this is why the max_stream is worked
> out there and used to limit the changes in registers. Otherwise
> my initial attempts either regressed GS texelFetch tests
> or primitive-id-restart. The current code has no regressions
> in piglit.
>
> This commit doesn't enable ARB_gpu_shader5, since that just
> bumps the glsl level to 4.00, so I'll just do a separate patch
> for 4.10.
>
> v1.1: fix bug introduced in rebase.
> v2: Address Marek's review comments,
> remove my llvm stream code for simpler C,
> move gsvs_ring and gs_next_vertex to arrays.
>
> Signed-off-by: Dave Airlie <airl...@redhat.com>
> ---
>  src/gallium/drivers/radeonsi/si_descriptors.c   |  4 +-
>  src/gallium/drivers/radeonsi/si_pipe.c          |  2 +-
>  src/gallium/drivers/radeonsi/si_shader.c        | 74 +++++++++++++++++++-----
>  src/gallium/drivers/radeonsi/si_state.c         |  4 --
>  src/gallium/drivers/radeonsi/si_state.h         |  7 ++-
>  src/gallium/drivers/radeonsi/si_state_shaders.c | 75 
> +++++++++++++++++++------
>  6 files changed, 127 insertions(+), 39 deletions(-)
>
> diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
> b/src/gallium/drivers/radeonsi/si_descriptors.c
> index 2e2a35b..14bb6e1 100644
> --- a/src/gallium/drivers/radeonsi/si_descriptors.c
> +++ b/src/gallium/drivers/radeonsi/si_descriptors.c
> @@ -724,7 +724,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint 
> shader, uint slot,
>                         struct pipe_resource *buffer,
>                         unsigned stride, unsigned num_records,
>                         bool add_tid, bool swizzle,
> -                       unsigned element_size, unsigned index_stride)
> +                       unsigned element_size, unsigned index_stride, 
> uint64_t offset)
>  {
>         struct si_context *sctx = (struct si_context *)ctx;
>         struct si_buffer_resources *buffers = &sctx->rw_buffers[shader];
> @@ -741,7 +741,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint 
> shader, uint slot,
>         if (buffer) {
>                 uint64_t va;
>
> -               va = r600_resource(buffer)->gpu_address;
> +               va = r600_resource(buffer)->gpu_address + offset;
>
>                 switch (element_size) {
>                 default:
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
> b/src/gallium/drivers/radeonsi/si_pipe.c
> index 808b9bc..a120282 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -316,7 +316,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum 
> pipe_cap param)
>         case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:
>                 return 4095;
>         case PIPE_CAP_MAX_VERTEX_STREAMS:
> -               return 1;
> +               return 4;
>
>         case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:
>                 return 2048;
> diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
> b/src/gallium/drivers/radeonsi/si_shader.c
> index fa31f73..d8bab87 100644
> --- a/src/gallium/drivers/radeonsi/si_shader.c
> +++ b/src/gallium/drivers/radeonsi/si_shader.c
> @@ -31,6 +31,7 @@
>  #include "gallivm/lp_bld_intr.h"
>  #include "gallivm/lp_bld_logic.h"
>  #include "gallivm/lp_bld_arit.h"
> +#include "gallivm/lp_bld_bitarit.h"
>  #include "gallivm/lp_bld_flow.h"
>  #include "radeon/r600_cs.h"
>  #include "radeon/radeon_llvm.h"
> @@ -87,8 +88,8 @@ struct si_shader_context
>         LLVMValueRef samplers[SI_NUM_SAMPLER_STATES];
>         LLVMValueRef so_buffers[4];
>         LLVMValueRef esgs_ring;
> -       LLVMValueRef gsvs_ring;
> -       LLVMValueRef gs_next_vertex;
> +       LLVMValueRef gsvs_ring[4];
> +       LLVMValueRef gs_next_vertex[4];
>  };
>
>  static struct si_shader_context * si_shader_context(
> @@ -1576,6 +1577,9 @@ static void si_llvm_emit_streamout(struct 
> si_shader_context *shader,
>         LLVMValueRef can_emit =
>                 LLVMBuildICmp(builder, LLVMIntULT, tid, so_vtx_count, "");
>
> +       LLVMValueRef stream_id =
> +               unpack_param(shader, shader->param_streamout_config, 24, 2);
> +
>         /* Emit the streamout code conditionally. This actually avoids
>          * out-of-bounds buffer access. The hw tells us via the SGPR
>          * (so_vtx_count) which threads are allowed to emit streamout data. */
> @@ -1615,7 +1619,9 @@ static void si_llvm_emit_streamout(struct 
> si_shader_context *shader,
>                         unsigned reg = so->output[i].register_index;
>                         unsigned start = so->output[i].start_component;
>                         unsigned num_comps = so->output[i].num_components;
> +                       unsigned stream = so->output[i].stream;
>                         LLVMValueRef out[4];
> +                       struct lp_build_if_state if_ctx_stream;
>
>                         assert(num_comps && num_comps <= 4);
>                         if (!num_comps || num_comps > 4)
> @@ -1649,11 +1655,18 @@ static void si_llvm_emit_streamout(struct 
> si_shader_context *shader,
>                                 break;
>                         }
>
> +                       LLVMValueRef can_emit_stream =
> +                               LLVMBuildICmp(builder, LLVMIntEQ,
> +                                             stream_id,
> +                                             lp_build_const_int32(gallivm, 
> stream), "");
> +
> +                       lp_build_if(&if_ctx_stream, gallivm, can_emit_stream);
>                         build_tbuffer_store_dwords(shader, 
> shader->so_buffers[buf_idx],
>                                                    vdata, num_comps,
>                                                    so_write_offset[buf_idx],
>                                                    LLVMConstInt(i32, 0, 0),
>                                                    
> so->output[i].dst_offset*4);
> +                       lp_build_endif(&if_ctx_stream);
>                 }
>         }
>         lp_build_endif(&if_ctx);
> @@ -3188,6 +3201,19 @@ static void build_interp_intrinsic(const struct 
> lp_build_tgsi_action *action,
>         }
>  }
>
> +static unsigned si_llvm_get_stream(struct lp_build_tgsi_context *bld_base,
> +                                      struct lp_build_emit_data *emit_data)
> +{
> +       LLVMValueRef (*imms)[4] = lp_soa_context(bld_base)->immediates;
> +       struct tgsi_src_register src0 = emit_data->inst->Src[0].Register;
> +       unsigned stream;
> +
> +       assert(src0.File == TGSI_FILE_IMMEDIATE);
> +
> +       stream = LLVMConstIntGetZExtValue(imms[src0.Index][src0.SwizzleX]) & 
> 0x3;
> +       return stream;
> +}
> +
>  /* Emit one vertex from the geometry shader */
>  static void si_llvm_emit_vertex(
>         const struct lp_build_tgsi_action *action,
> @@ -3207,9 +3233,14 @@ static void si_llvm_emit_vertex(
>         LLVMValueRef args[2];
>         unsigned chan;
>         int i;
> +       unsigned stream;
> +
> +       stream = si_llvm_get_stream(bld_base, emit_data);
>
>         /* Write vertex attribute values to GSVS ring */
> -       gs_next_vertex = LLVMBuildLoad(gallivm->builder, 
> si_shader_ctx->gs_next_vertex, "");
> +       gs_next_vertex = LLVMBuildLoad(gallivm->builder,
> +                                      si_shader_ctx->gs_next_vertex[stream],
> +                                      "");
>
>         /* If this thread has already emitted the declared maximum number of
>          * vertices, kill it: excessive vertex emissions are not supposed to
> @@ -3222,6 +3253,7 @@ static void si_llvm_emit_vertex(
>         kill = lp_build_select(&bld_base->base, can_emit,
>                                lp_build_const_float(gallivm, 1.0f),
>                                lp_build_const_float(gallivm, -1.0f));
> +
>         build_intrinsic(gallivm->builder, "llvm.AMDGPU.kill",
>                         LLVMVoidTypeInContext(gallivm->context), &kill, 1, 0);
>
> @@ -3241,7 +3273,7 @@ static void si_llvm_emit_vertex(
>                         out_val = LLVMBuildBitCast(gallivm->builder, out_val, 
> i32, "");
>
>                         build_tbuffer_store(si_shader_ctx,
> -                                           si_shader_ctx->gsvs_ring,
> +                                           si_shader_ctx->gsvs_ring[stream],
>                                             out_val, 1,
>                                             voffset, soffset, 0,
>                                             V_008F0C_BUF_DATA_FORMAT_32,
> @@ -3251,10 +3283,11 @@ static void si_llvm_emit_vertex(
>         }
>         gs_next_vertex = lp_build_add(uint, gs_next_vertex,
>                                       lp_build_const_int32(gallivm, 1));
> -       LLVMBuildStore(gallivm->builder, gs_next_vertex, 
> si_shader_ctx->gs_next_vertex);
> +
> +       LLVMBuildStore(gallivm->builder, gs_next_vertex, 
> si_shader_ctx->gs_next_vertex[stream]);
>
>         /* Signal vertex emission */
> -       args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | 
> SENDMSG_GS);
> +       args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_EMIT | 
> SENDMSG_GS | (stream << 8));
>         args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, 
> SI_PARAM_GS_WAVE_ID);
>         build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
>                         LLVMVoidTypeInContext(gallivm->context), args, 2,
> @@ -3270,9 +3303,11 @@ static void si_llvm_emit_primitive(
>         struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
>         struct gallivm_state *gallivm = bld_base->base.gallivm;
>         LLVMValueRef args[2];
> +       unsigned stream;
>
>         /* Signal primitive cut */
> -       args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | 
> SENDMSG_GS);
> +       stream = si_llvm_get_stream(bld_base, emit_data);
> +       args[0] = lp_build_const_int32(gallivm, SENDMSG_GS_OP_CUT | 
> SENDMSG_GS | (stream << 8));
>         args[1] = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, 
> SI_PARAM_GS_WAVE_ID);
>         build_intrinsic(gallivm->builder, "llvm.SI.sendmsg",
>                         LLVMVoidTypeInContext(gallivm->context), args, 2,
> @@ -3651,13 +3686,21 @@ static void preload_ring_buffers(struct 
> si_shader_context *si_shader_ctx)
>                         build_indexed_load_const(si_shader_ctx, buf_ptr, 
> offset);
>         }
>
> -       if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY ||
> -           si_shader_ctx->shader->is_gs_copy_shader) {
> +       if (si_shader_ctx->shader->is_gs_copy_shader) {
>                 LLVMValueRef offset = lp_build_const_int32(gallivm, 
> SI_RING_GSVS);
>
> -               si_shader_ctx->gsvs_ring =
> +               si_shader_ctx->gsvs_ring[0] =
>                         build_indexed_load_const(si_shader_ctx, buf_ptr, 
> offset);
>         }
> +       if (si_shader_ctx->type == TGSI_PROCESSOR_GEOMETRY) {
> +               int i;
> +               for (i = 0; i < 4; i++) {
> +                       LLVMValueRef offset = lp_build_const_int32(gallivm, 
> SI_RING_GSVS + i);
> +
> +                       si_shader_ctx->gsvs_ring[i] =
> +                               build_indexed_load_const(si_shader_ctx, 
> buf_ptr, offset);
> +               }
> +       }
>  }
>
>  void si_shader_binary_read_config(const struct si_screen *sscreen,
> @@ -3838,7 +3881,7 @@ static int si_generate_gs_copy_shader(struct si_screen 
> *sscreen,
>         preload_streamout_buffers(si_shader_ctx);
>         preload_ring_buffers(si_shader_ctx);
>
> -       args[0] = si_shader_ctx->gsvs_ring;
> +       args[0] = si_shader_ctx->gsvs_ring[0];
>         args[1] = lp_build_mul_imm(uint,
>                                    
> LLVMGetParam(si_shader_ctx->radeon_bld.main_fn,
>                                                 
> si_shader_ctx->param_vertex_id),
> @@ -4076,9 +4119,12 @@ int si_shader_create(struct si_screen *sscreen, 
> LLVMTargetMachineRef tm,
>         preload_ring_buffers(&si_shader_ctx);
>
>         if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
> -               si_shader_ctx.gs_next_vertex =
> -                       lp_build_alloca(bld_base->base.gallivm,
> -                                       bld_base->uint_bld.elem_type, "");
> +               int i;
> +               for (i = 0; i < 4; i++) {
> +                       si_shader_ctx.gs_next_vertex[i] =
> +                               lp_build_alloca(bld_base->base.gallivm,
> +                                               bld_base->uint_bld.elem_type, 
> "");
> +               }
>         }
>
>         if (!lp_build_tgsi_llvm(bld_base, tokens)) {
> diff --git a/src/gallium/drivers/radeonsi/si_state.c 
> b/src/gallium/drivers/radeonsi/si_state.c
> index ab5c3ca..86e1624 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -3138,10 +3138,6 @@ static void si_init_config(struct si_context *sctx)
>         si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0);
>         si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
>
> -       si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, 0);
> -       si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, 0);
> -       si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, 0);
> -
>         si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
>         si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
>         si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
> diff --git a/src/gallium/drivers/radeonsi/si_state.h 
> b/src/gallium/drivers/radeonsi/si_state.h
> index 2522053..e4d859a 100644
> --- a/src/gallium/drivers/radeonsi/si_state.h
> +++ b/src/gallium/drivers/radeonsi/si_state.h
> @@ -148,7 +148,10 @@ struct si_shader_data {
>  #define SI_RING_TESS_FACTOR    0 /* for HS (TCS)  */
>  #define SI_RING_ESGS           0 /* for ES, GS */
>  #define SI_RING_GSVS           1 /* for GS, VS */
> -#define SI_NUM_RING_BUFFERS    2
> +#define SI_RING_GSVS_1         2 /* 1, 2, 3 for GS */
> +#define SI_RING_GSVS_2         3
> +#define SI_RING_GSVS_3         4
> +#define SI_NUM_RING_BUFFERS    5
>  #define SI_SO_BUF_OFFSET       SI_NUM_RING_BUFFERS
>  #define SI_NUM_RW_BUFFERS      (SI_SO_BUF_OFFSET + 4)
>
> @@ -249,7 +252,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint 
> shader, uint slot,
>                         struct pipe_resource *buffer,
>                         unsigned stride, unsigned num_records,
>                         bool add_tid, bool swizzle,
> -                       unsigned element_size, unsigned index_stride);
> +                       unsigned element_size, unsigned index_stride, 
> uint64_t offset);
>  void si_init_all_descriptors(struct si_context *sctx);
>  void si_release_all_descriptors(struct si_context *sctx);
>  void si_all_descriptors_begin_new_cs(struct si_context *sctx);
> diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
> b/src/gallium/drivers/radeonsi/si_state_shaders.c
> index 18bddfd..1a6854e 100644
> --- a/src/gallium/drivers/radeonsi/si_state_shaders.c
> +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
> @@ -206,16 +206,32 @@ static void si_shader_es(struct si_shader *shader)
>                 si_set_tesseval_regs(shader, pm4);
>  }
>
> +static unsigned si_gs_get_max_stream(struct si_shader *shader)
> +{
> +       struct pipe_stream_output_info *so = &shader->selector->so;
> +       unsigned max_stream = 0, i;
> +
> +       if (so->num_outputs == 0)
> +               return 0;
> +
> +       for (i = 0; i < so->num_outputs; i++) {
> +               if (so->output[i].stream > max_stream)
> +                       max_stream = so->output[i].stream;
> +       }
> +       return max_stream;
> +}
> +
>  static void si_shader_gs(struct si_shader *shader)
>  {
> -       unsigned gs_vert_itemsize = shader->selector->info.num_outputs * (16 
> >> 2);
> +       unsigned gs_vert_itemsize = shader->selector->info.num_outputs * 16;
>         unsigned gs_max_vert_out = shader->selector->gs_max_out_vertices;
> -       unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
> +       unsigned gsvs_itemsize = (gs_vert_itemsize * gs_max_vert_out) >> 2;
>         unsigned gs_num_invocations = shader->selector->gs_num_invocations;
>         unsigned cut_mode;
>         struct si_pm4_state *pm4;
>         unsigned num_sgprs, num_user_sgprs;
>         uint64_t va;
> +       unsigned max_stream = si_gs_get_max_stream(shader);
>
>         /* The GSVS_RING_ITEMSIZE register takes 15 bits */
>         assert(gsvs_itemsize < (1 << 15));
> @@ -243,16 +259,19 @@ static void si_shader_gs(struct si_shader *shader)
>                        S_028A40_GS_WRITE_OPTIMIZE(1));
>
>         si_pm4_set_reg(pm4, R_028A60_VGT_GSVS_RING_OFFSET_1, gsvs_itemsize);
> -       si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize);
> -       si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize);
> +       si_pm4_set_reg(pm4, R_028A64_VGT_GSVS_RING_OFFSET_2, gsvs_itemsize * 
> ((max_stream >= 2) ? 2 : 1));
> +       si_pm4_set_reg(pm4, R_028A68_VGT_GSVS_RING_OFFSET_3, gsvs_itemsize * 
> ((max_stream >= 3) ? 3 : 1));
>
>         si_pm4_set_reg(pm4, R_028AAC_VGT_ESGS_RING_ITEMSIZE,
>                        util_bitcount64(shader->selector->inputs_read) * (16 
> >> 2));
> -       si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize);
> +       si_pm4_set_reg(pm4, R_028AB0_VGT_GSVS_RING_ITEMSIZE, gsvs_itemsize * 
> (max_stream + 1));
>
>         si_pm4_set_reg(pm4, R_028B38_VGT_GS_MAX_VERT_OUT, gs_max_vert_out);
>
> -       si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize);
> +       si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize 
> >> 2);
> +       si_pm4_set_reg(pm4, R_028B60_VGT_GS_VERT_ITEMSIZE_1, (max_stream >= 
> 1) ? gs_vert_itemsize >> 2 : 0);
> +       si_pm4_set_reg(pm4, R_028B64_VGT_GS_VERT_ITEMSIZE_2, (max_stream >= 
> 2) ? gs_vert_itemsize >> 2 : 0);
> +       si_pm4_set_reg(pm4, R_028B68_VGT_GS_VERT_ITEMSIZE_3, (max_stream >= 
> 3) ? gs_vert_itemsize >> 2 : 0);
>
>         si_pm4_set_reg(pm4, R_028B90_VGT_GS_INSTANCE_CNT,
>                        S_028B90_CNT(MIN2(gs_num_invocations, 127)) |
> @@ -1001,15 +1020,42 @@ static void si_init_gs_rings(struct si_context *sctx)
>
>         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_ESGS,
>                            sctx->esgs_ring, 0, esgs_ring_size,
> -                          true, true, 4, 64);
> +                          true, true, 4, 64, 0);
>         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_ESGS,
>                            sctx->esgs_ring, 0, esgs_ring_size,
> -                          false, false, 0, 0);
> +                          false, false, 0, 0, 0);
>         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_VERTEX, SI_RING_GSVS,
>                            sctx->gsvs_ring, 0, gsvs_ring_size,
> -                          false, false, 0, 0);
> +                          false, false, 0, 0, 0);
>  }
>
> +static void si_update_gs_rings(struct si_context *sctx)
> +{
> +       unsigned gs_vert_itemsize = sctx->gs_shader->info.num_outputs * 16;
> +       unsigned gs_max_vert_out = sctx->gs_shader->gs_max_out_vertices;
> +       unsigned gsvs_itemsize = gs_vert_itemsize * gs_max_vert_out;
> +       uint64_t offset;
> +
> +       si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
> +                          sctx->gsvs_ring, gsvs_itemsize,
> +                          64, true, true, 4, 16, 0);
> +
> +       offset = gsvs_itemsize * 64;
> +       si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_1,
> +                          sctx->gsvs_ring, gsvs_itemsize,
> +                          64, true, true, 4, 16, offset);
> +
> +       offset = (gsvs_itemsize * 2) * 64;
> +       si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_2,
> +                          sctx->gsvs_ring, gsvs_itemsize,
> +                          64, true, true, 4, 16, offset);
> +
> +       offset = (gsvs_itemsize * 3) * 64;
> +       si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_GEOMETRY, SI_RING_GSVS_3,
> +                          sctx->gsvs_ring, gsvs_itemsize,
> +                          64, true, true, 4, 16, offset);
> +
> +}
>  /**
>   * @returns 1 if \p sel has been updated to use a new scratch buffer and 0
>   *          otherwise.
> @@ -1171,7 +1217,7 @@ static void si_init_tess_factor_ring(struct si_context 
> *sctx)
>
>         si_set_ring_buffer(&sctx->b.b, PIPE_SHADER_TESS_CTRL,
>                            SI_RING_TESS_FACTOR, sctx->tf_ring, 0,
> -                          sctx->tf_ring->width0, false, false, 0, 0);
> +                          sctx->tf_ring->width0, false, false, 0, 0, 0);
>
>         sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
>  }
> @@ -1252,7 +1298,7 @@ static void si_update_so(struct si_context *sctx, 
> struct si_shader_selector *sha
>         int i;
>
>         for (i = 0; i < so->num_outputs; i++)
> -               enabled_stream_buffers_mask |= (1 << 
> so->output[i].output_buffer);
> +               enabled_stream_buffers_mask |= (1 << 
> so->output[i].output_buffer) << (so->output[i].stream * 4);
>         sctx->b.streamout.enabled_stream_buffers_mask = 
> enabled_stream_buffers_mask;
>         sctx->b.streamout.stride_in_dw = shader->so.stride;
>  }
> @@ -1311,15 +1357,12 @@ void si_update_shaders(struct si_context *sctx)
>
>                 if (!sctx->gs_rings)
>                         si_init_gs_rings(sctx);
> +
>                 if (sctx->emitted.named.gs_rings != sctx->gs_rings)
>                         sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
>                 si_pm4_bind_state(sctx, gs_rings, sctx->gs_rings);
>
> -               si_set_ring_buffer(ctx, PIPE_SHADER_GEOMETRY, SI_RING_GSVS,
> -                                  sctx->gsvs_ring,
> -                                  sctx->gs_shader->gs_max_out_vertices *
> -                                  sctx->gs_shader->info.num_outputs * 16,
> -                                  64, true, true, 4, 16);
> +               si_update_gs_rings(sctx);
>         } else {
>                 si_pm4_bind_state(sctx, gs_rings, NULL);
>                 si_pm4_bind_state(sctx, gs, NULL);
> --
> 2.4.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] radeonsi: add GS multiple streams support (v2)

Reply via email to