From: Marek Olšák <marek.ol...@amd.com> The OpenMAX state tracker will use this.
RadeonSI is adapted to use pipe_grid_info::last_block instead of its internal state. --- src/gallium/auxiliary/util/u_screen.c | 3 +++ src/gallium/docs/source/screen.rst | 2 ++ src/gallium/drivers/radeonsi/si_compute.c | 2 +- .../drivers/radeonsi/si_compute_blit.c | 18 +++++---------- src/gallium/drivers/radeonsi/si_get.c | 1 + src/gallium/drivers/radeonsi/si_pipe.h | 22 ------------------- src/gallium/include/pipe/p_defines.h | 1 + src/gallium/include/pipe/p_state.h | 21 ++++++++++++++++++ 8 files changed, 35 insertions(+), 35 deletions(-) diff --git a/src/gallium/auxiliary/util/u_screen.c b/src/gallium/auxiliary/util/u_screen.c index 50964f3b3ef..b902c083ad4 100644 --- a/src/gallium/auxiliary/util/u_screen.c +++ b/src/gallium/auxiliary/util/u_screen.c @@ -334,14 +334,17 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen, return 2047; case PIPE_CAP_SURFACE_SAMPLE_COUNT: return 0; case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL: return 1; case PIPE_CAP_MAX_VARYINGS: return 8; + case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK: + return 0; + default: unreachable("bad PIPE_CAP_*"); } } diff --git a/src/gallium/docs/source/screen.rst b/src/gallium/docs/source/screen.rst index 85ca5e1f5ce..60ba9bcbde0 100644 --- a/src/gallium/docs/source/screen.rst +++ b/src/gallium/docs/source/screen.rst @@ -485,20 +485,22 @@ The integer capabilities: * ``PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND``: True if the driver needs blend state to use zero/one instead of destination alpha for RGB/XRGB formats. * ``PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS``: True if the driver wants TESSINNER and TESSOUTER to be inputs (rather than system values) for tessellation evaluation shaders. * ``PIPE_CAP_DEST_SURFACE_SRGB_CONTROL``: Indicates whether the drivers supports switching the format between sRGB and linear for a surface that is used as destination in draw and blit calls. * ``PIPE_CAP_NIR_COMPACT_ARRAYS``: True if the compiler backend supports NIR's compact array feature, for all shader stages. * ``PIPE_CAP_MAX_VARYINGS``: The maximum number of fragment shader varyings. This will generally correspond to ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some cases may be a smaller number. +* ``PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK``: Whether pipe_grid_info::last_block + is implemented by the driver. See struct pipe_grid_info for more details. .. _pipe_capf: PIPE_CAPF_* ^^^^^^^^^^^^^^^^ The floating-point capabilities are: * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line. * ``PIPE_CAPF_MAX_LINE_WIDTH_AA``: The maximum width of a smoothed line. diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 87addd53976..6c2269d903a 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -797,21 +797,21 @@ static void si_emit_dispatch_packets(struct si_context *sctx, radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS, compute_resource_limits); unsigned dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1) | S_00B800_FORCE_START_AT_000(1) | /* If the KMD allows it (there is a KMD hw register for it), * allow launching waves out-of-order. (same as Vulkan) */ S_00B800_ORDER_MODE(sctx->chip_class >= CIK); - uint *last_block = sctx->compute_last_block; + uint *last_block = info->last_block; bool partial_block_en = last_block[0] || last_block[1] || last_block[2]; radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); if (partial_block_en) { unsigned partial[3]; /* If no partial_block, these should be an entire block size, not 0. */ partial[0] = last_block[0] ? last_block[0] : info->block[0]; partial[1] = last_block[1] ? last_block[1] : info->block[1]; diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c b/src/gallium/drivers/radeonsi/si_compute_blit.c index f5e9c02dd10..a7453099ac6 100644 --- a/src/gallium/drivers/radeonsi/si_compute_blit.c +++ b/src/gallium/drivers/radeonsi/si_compute_blit.c @@ -374,45 +374,42 @@ void si_compute_copy_image(struct si_context *sctx, ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image); struct pipe_grid_info info = {0}; if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == PIPE_TEXTURE_1D_ARRAY) { if (!sctx->cs_copy_image_1d_array) sctx->cs_copy_image_1d_array = si_create_copy_image_compute_shader_1d_array(ctx); ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array); info.block[0] = 64; - sctx->compute_last_block[0] = width % 64; + info.last_block[0] = width % 64; info.block[1] = 1; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 64); info.grid[1] = depth; info.grid[2] = 1; } else { if (!sctx->cs_copy_image) sctx->cs_copy_image = si_create_copy_image_compute_shader(ctx); ctx->bind_compute_state(ctx, sctx->cs_copy_image); info.block[0] = 8; - sctx->compute_last_block[0] = width % 8; + info.last_block[0] = width % 8; info.block[1] = 8; - sctx->compute_last_block[1] = height % 8; + info.last_block[1] = height % 8; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 8); info.grid[1] = DIV_ROUND_UP(height, 8); info.grid[2] = depth; } ctx->launch_grid(ctx, &info); - sctx->compute_last_block[0] = 0; - sctx->compute_last_block[1] = 0; - sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image); ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); si_compute_internal_end(sctx); } void si_init_compute_blit_functions(struct si_context *sctx) @@ -476,44 +473,41 @@ void si_compute_clear_render_target(struct pipe_context *ctx, ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image); struct pipe_grid_info info = {0}; if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) { if (!sctx->cs_clear_render_target) sctx->cs_clear_render_target = si_clear_render_target_shader(ctx); ctx->bind_compute_state(ctx, sctx->cs_clear_render_target); info.block[0] = 8; - sctx->compute_last_block[0] = width % 8; + info.last_block[0] = width % 8; info.block[1] = 8; - sctx->compute_last_block[1] = height % 8; + info.last_block[1] = height % 8; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 8); info.grid[1] = DIV_ROUND_UP(height, 8); info.grid[2] = num_layers; } else { if (!sctx->cs_clear_render_target_1d_array) sctx->cs_clear_render_target_1d_array = si_clear_render_target_shader_1d_array(ctx); ctx->bind_compute_state(ctx, sctx->cs_clear_render_target_1d_array); info.block[0] = 64; - sctx->compute_last_block[0] = width % 64; + info.last_block[0] = width % 64; info.block[1] = 1; info.block[2] = 1; info.grid[0] = DIV_ROUND_UP(width, 64); info.grid[1] = num_layers; info.grid[2] = 1; } ctx->launch_grid(ctx, &info); - sctx->compute_last_block[0] = 0; - sctx->compute_last_block[1] = 0; - sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 : 0) | si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM); ctx->bind_compute_state(ctx, saved_cs); ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image); ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb); si_compute_internal_end(sctx); } diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c index a5cb209b59e..6fa67087c7d 100644 --- a/src/gallium/drivers/radeonsi/si_get.c +++ b/src/gallium/drivers/radeonsi/si_get.c @@ -153,20 +153,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_INT64: case PIPE_CAP_INT64_DIVMOD: case PIPE_CAP_TGSI_CLOCK: case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX: case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION: case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS: case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET: case PIPE_CAP_TGSI_BALLOT: case PIPE_CAP_TGSI_VOTE: case PIPE_CAP_TGSI_FS_FBFETCH: + case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK: return 1; case PIPE_CAP_RESOURCE_FROM_USER_MEMORY: return !SI_BIG_ENDIAN && sscreen->info.has_userptr; case PIPE_CAP_DEVICE_RESET_STATUS_QUERY: return sscreen->info.has_gpu_reset_status_query || sscreen->info.has_gpu_reset_counter_query; case PIPE_CAP_TEXTURE_MULTISAMPLE: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index b3198d45ea6..b6858b46ec0 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -914,42 +914,20 @@ struct si_context { struct pipe_resource *gsvs_ring; struct pipe_resource *tess_rings; union pipe_color_union *border_color_table; /* in CPU memory, any endian */ struct si_resource *border_color_buffer; union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */ unsigned border_color_count; unsigned num_vs_blit_sgprs; uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD]; uint32_t cs_user_data[4]; - /** - * last_block allows disabling threads at the farthermost grid boundary. - * Full blocks as specified by "block" are launched, but the threads - * outside of "last_block" dimensions are disabled. - * - * If a block touches the grid boundary in the i-th axis, threads with - * THREAD_ID[i] >= last_block[i] are disabled. - * - * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i], - * meaning no effect. - * - * It's equivalent to doing this at the beginning of the compute shader: - * - * for (i = 0; i < 3; i++) { - * if (block_id[i] == grid[i] - 1 && - * last_block[i] && last_block[i] >= thread_id[i]) - * return; - * } - * (this could be moved into pipe_grid_info) - */ - uint compute_last_block[3]; - /* Vertex and index buffers. */ bool vertex_buffers_dirty; bool vertex_buffer_pointer_dirty; struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS]; /* MSAA config state. */ int ps_iter_samples; bool ps_uses_fbfetch; bool smoothing_enabled; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index e2b0104ce43..d4732dc257f 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -851,20 +851,21 @@ enum pipe_cap PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS, PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET, PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET, PIPE_CAP_SURFACE_SAMPLE_COUNT, PIPE_CAP_TGSI_ATOMFADD, PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE, PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND, PIPE_CAP_DEST_SURFACE_SRGB_CONTROL, PIPE_CAP_NIR_COMPACT_ARRAYS, PIPE_CAP_MAX_VARYINGS, + PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK, }; /** * Possible bits for PIPE_CAP_CONTEXT_PRIORITY_MASK param, which should * return a bitmask of the supported priorities. If the driver does not * support prioritized contexts, it can return 0. * * Note that these match __DRI2_RENDER_HAS_CONTEXT_PRIORITY_* */ #define PIPE_CONTEXT_PRIORITY_LOW (1 << 0) diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 38052e5fd3d..3a91ddd71b5 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -831,20 +831,41 @@ struct pipe_grid_info * clEnqueueNDRangeKernel. Note block[] and grid[] must be padded with * 1 for non-used dimensions. */ uint work_dim; /** * Determine the layout of the working block (in thread units) to be used. */ uint block[3]; + /** + * last_block allows disabling threads at the farthermost grid boundary. + * Full blocks as specified by "block" are launched, but the threads + * outside of "last_block" dimensions are disabled. + * + * If a block touches the grid boundary in the i-th axis, threads with + * THREAD_ID[i] >= last_block[i] are disabled. + * + * If last_block[i] is 0, it has the same behavior as last_block[i] = block[i], + * meaning no effect. + * + * It's equivalent to doing this at the beginning of the compute shader: + * + * for (i = 0; i < 3; i++) { + * if (block_id[i] == grid[i] - 1 && + * last_block[i] && thread_id[i] >= last_block[i]) + * return; + * } + */ + uint last_block[3]; + /** * Determine the layout of the grid (in block units) to be used. */ uint grid[3]; /* Indirect compute parameters resource: If not NULL, block sizes are taken * from this buffer instead, which is laid out as follows: * * struct { * uint32_t num_blocks_x; -- 2.17.1 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev