Why does this need to be in p_state? And who is responsible for setting it (and how will it be set)?
On Tue, Jan 8, 2019 at 2:47 PM Jiang, Sonny <sonny.ji...@amd.com> wrote: > > and add radeonsi support. This will be used by radeonsi internally. > > Signed-off-by: Sonny Jiang <sonny.ji...@amd.com> > --- > src/gallium/drivers/radeonsi/si_compute.c | 33 +++++++++++++++++++---- > src/gallium/include/pipe/p_state.h | 7 +++++ > 2 files changed, 35 insertions(+), 5 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/si_compute.c > b/src/gallium/drivers/radeonsi/si_compute.c > index cbcd8e79c7b..69ffad45cd9 100644 > --- a/src/gallium/drivers/radeonsi/si_compute.c > +++ b/src/gallium/drivers/radeonsi/si_compute.c > @@ -797,11 +797,6 @@ static void si_emit_dispatch_packets(struct si_context > *sctx, > radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, > compute_resource_limits); > > - radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); > - radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); > - radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); > - radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); > - > unsigned dispatch_initiator = > S_00B800_COMPUTE_SHADER_EN(1) | > S_00B800_FORCE_START_AT_000(1) | > @@ -809,6 +804,34 @@ static void si_emit_dispatch_packets(struct si_context > *sctx, > * allow launching waves out-of-order. (same as Vulkan) */ > S_00B800_ORDER_MODE(sctx->chip_class >= CIK); > > + bool partial_block_en = info->partial_block[0] || > + info->partial_block[1] || > + info->partial_block[2]; > + > + radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); > + > + if (partial_block_en) { > + unsigned partial[3]; > + > + /* If no partial_block, these should be an entire block size, > not 0. */ > + partial[0] = info->partial_block[0] ? info->partial_block[0] > : info->block[0]; > + partial[1] = info->partial_block[1] ? info->partial_block[1] > : info->block[1]; > + partial[2] = info->partial_block[2] ? info->partial_block[2] > : info->block[2]; > + > + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0]) | > + S_00B81C_NUM_THREAD_PARTIAL(partial[0])); > + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1]) | > + S_00B820_NUM_THREAD_PARTIAL(partial[1])); > + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2]) | > + S_00B824_NUM_THREAD_PARTIAL(partial[2])); > + > + dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); > + } else { > + radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(info->block[0])); > + radeon_emit(cs, S_00B820_NUM_THREAD_FULL(info->block[1])); > + radeon_emit(cs, S_00B824_NUM_THREAD_FULL(info->block[2])); > + } > + > if (info->indirect) { > uint64_t base_va = r600_resource(info->indirect)->gpu_address; > > diff --git a/src/gallium/include/pipe/p_state.h > b/src/gallium/include/pipe/p_state.h > index 38052e5fd3d..56f5bdd4c85 100644 > --- a/src/gallium/include/pipe/p_state.h > +++ b/src/gallium/include/pipe/p_state.h > @@ -838,6 +838,13 @@ struct pipe_grid_info > */ > uint block[3]; > > + /** > + * Number of threads to add to the grid in X, Y, and Z directions for > + * compute dispatches that are not aligned to the block size. > + * The added threads will be launched as partial thread blocks. > + */ > + uint partial_block[3]; > + > /** > * Determine the layout of the grid (in block units) to be used. > */ > -- > 2.17.1 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev