From: Marek Olšák <marek.ol...@amd.com>

The OpenMAX state tracker will use this.

RadeonSI is adapted to use pipe_grid_info::last_block instead of its
internal state.
---
 src/gallium/auxiliary/util/u_screen.c         |  3 +++
 src/gallium/docs/source/screen.rst            |  2 ++
 src/gallium/drivers/radeonsi/si_compute.c     |  2 +-
 .../drivers/radeonsi/si_compute_blit.c        | 18 +++++----------
 src/gallium/drivers/radeonsi/si_get.c         |  1 +
 src/gallium/drivers/radeonsi/si_pipe.h        | 22 -------------------
 src/gallium/include/pipe/p_defines.h          |  1 +
 src/gallium/include/pipe/p_state.h            | 21 ++++++++++++++++++
 8 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_screen.c 
b/src/gallium/auxiliary/util/u_screen.c
index 50964f3b3ef..b902c083ad4 100644
--- a/src/gallium/auxiliary/util/u_screen.c
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -334,14 +334,17 @@ u_pipe_screen_get_param_defaults(struct pipe_screen 
*pscreen,
       return 2047;
 
    case PIPE_CAP_SURFACE_SAMPLE_COUNT:
       return 0;
    case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:
       return 1;
 
    case PIPE_CAP_MAX_VARYINGS:
       return 8;
 
+   case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
+      return 0;
+
    default:
       unreachable("bad PIPE_CAP_*");
    }
 }
diff --git a/src/gallium/docs/source/screen.rst 
b/src/gallium/docs/source/screen.rst
index 85ca5e1f5ce..60ba9bcbde0 100644
--- a/src/gallium/docs/source/screen.rst
+++ b/src/gallium/docs/source/screen.rst
@@ -485,20 +485,22 @@ The integer capabilities:
 * ``PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND``: True if the driver needs blend 
state to use zero/one instead of destination alpha for RGB/XRGB formats.
 * ``PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS``: True if the driver wants TESSINNER 
and TESSOUTER to be inputs (rather than system values) for tessellation 
evaluation shaders.
 * ``PIPE_CAP_DEST_SURFACE_SRGB_CONTROL``: Indicates whether the drivers
   supports switching the format between sRGB and linear for a surface that is
   used as destination in draw and blit calls.
 * ``PIPE_CAP_NIR_COMPACT_ARRAYS``: True if the compiler backend supports NIR's 
compact array feature, for all shader stages.
 * ``PIPE_CAP_MAX_VARYINGS``: The maximum number of fragment shader
   varyings. This will generally correspond to
   ``PIPE_SHADER_CAP_MAX_INPUTS`` for the fragment shader, but in some
   cases may be a smaller number.
+* ``PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK``: Whether pipe_grid_info::last_block
+  is implemented by the driver. See struct pipe_grid_info for more details.
 
 .. _pipe_capf:
 
 PIPE_CAPF_*
 ^^^^^^^^^^^^^^^^
 
 The floating-point capabilities are:
 
 * ``PIPE_CAPF_MAX_LINE_WIDTH``: The maximum width of a regular line.
 * ``PIPE_CAPF_MAX_LINE_WIDTH_AA``: The maximum width of a smoothed line.
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 87addd53976..6c2269d903a 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -797,21 +797,21 @@ static void si_emit_dispatch_packets(struct si_context 
*sctx,
        radeon_set_sh_reg(cs, R_00B854_COMPUTE_RESOURCE_LIMITS,
                          compute_resource_limits);
 
        unsigned dispatch_initiator =
                S_00B800_COMPUTE_SHADER_EN(1) |
                S_00B800_FORCE_START_AT_000(1) |
                /* If the KMD allows it (there is a KMD hw register for it),
                 * allow launching waves out-of-order. (same as Vulkan) */
                S_00B800_ORDER_MODE(sctx->chip_class >= CIK);
 
-       uint *last_block = sctx->compute_last_block;
+       uint *last_block = info->last_block;
        bool partial_block_en = last_block[0] || last_block[1] || last_block[2];
 
        radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
 
        if (partial_block_en) {
                unsigned partial[3];
 
                /* If no partial_block, these should be an entire block size, 
not 0. */
                partial[0] = last_block[0] ? last_block[0] : info->block[0];
                partial[1] = last_block[1] ? last_block[1] : info->block[1];
diff --git a/src/gallium/drivers/radeonsi/si_compute_blit.c 
b/src/gallium/drivers/radeonsi/si_compute_blit.c
index f5e9c02dd10..a7453099ac6 100644
--- a/src/gallium/drivers/radeonsi/si_compute_blit.c
+++ b/src/gallium/drivers/radeonsi/si_compute_blit.c
@@ -374,45 +374,42 @@ void si_compute_copy_image(struct si_context *sctx,
        ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, image);
 
        struct pipe_grid_info info = {0};
 
        if (dst->target == PIPE_TEXTURE_1D_ARRAY && src->target == 
PIPE_TEXTURE_1D_ARRAY) {
                if (!sctx->cs_copy_image_1d_array)
                        sctx->cs_copy_image_1d_array =
                                
si_create_copy_image_compute_shader_1d_array(ctx);
                ctx->bind_compute_state(ctx, sctx->cs_copy_image_1d_array);
                info.block[0] = 64;
-               sctx->compute_last_block[0] = width % 64;
+               info.last_block[0] = width % 64;
                info.block[1] = 1;
                info.block[2] = 1;
                info.grid[0] = DIV_ROUND_UP(width, 64);
                info.grid[1] = depth;
                info.grid[2] = 1;
        } else {
                if (!sctx->cs_copy_image)
                        sctx->cs_copy_image = 
si_create_copy_image_compute_shader(ctx);
                ctx->bind_compute_state(ctx, sctx->cs_copy_image);
                info.block[0] = 8;
-               sctx->compute_last_block[0] = width % 8;
+               info.last_block[0] = width % 8;
                info.block[1] = 8;
-               sctx->compute_last_block[1] = height % 8;
+               info.last_block[1] = height % 8;
                info.block[2] = 1;
                info.grid[0] = DIV_ROUND_UP(width, 8);
                info.grid[1] = DIV_ROUND_UP(height, 8);
                info.grid[2] = depth;
        }
 
        ctx->launch_grid(ctx, &info);
 
-       sctx->compute_last_block[0] = 0;
-       sctx->compute_last_block[1] = 0;
-
        sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
                       (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 
: 0) |
                       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
        ctx->bind_compute_state(ctx, saved_cs);
        ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 2, saved_image);
        ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
        si_compute_internal_end(sctx);
 }
 
 void si_init_compute_blit_functions(struct si_context *sctx)
@@ -476,44 +473,41 @@ void si_compute_clear_render_target(struct pipe_context 
*ctx,
 
        ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &image);
 
        struct pipe_grid_info info = {0};
 
        if (dstsurf->texture->target != PIPE_TEXTURE_1D_ARRAY) {
                if (!sctx->cs_clear_render_target)
                        sctx->cs_clear_render_target = 
si_clear_render_target_shader(ctx);
                ctx->bind_compute_state(ctx, sctx->cs_clear_render_target);
                info.block[0] = 8;
-               sctx->compute_last_block[0] = width % 8;
+               info.last_block[0] = width % 8;
                info.block[1] = 8;
-               sctx->compute_last_block[1] = height % 8;
+               info.last_block[1] = height % 8;
                info.block[2] = 1;
                info.grid[0] = DIV_ROUND_UP(width, 8);
                info.grid[1] = DIV_ROUND_UP(height, 8);
                info.grid[2] = num_layers;
        } else {
                if (!sctx->cs_clear_render_target_1d_array)
                        sctx->cs_clear_render_target_1d_array =
                                si_clear_render_target_shader_1d_array(ctx);
                ctx->bind_compute_state(ctx, 
sctx->cs_clear_render_target_1d_array);
                info.block[0] = 64;
-               sctx->compute_last_block[0] = width % 64;
+               info.last_block[0] = width % 64;
                info.block[1] = 1;
                info.block[2] = 1;
                info.grid[0] = DIV_ROUND_UP(width, 64);
                info.grid[1] = num_layers;
                info.grid[2] = 1;
        }
 
        ctx->launch_grid(ctx, &info);
 
-       sctx->compute_last_block[0] = 0;
-       sctx->compute_last_block[1] = 0;
-
        sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH |
                       (sctx->chip_class <= VI ? SI_CONTEXT_WRITEBACK_GLOBAL_L2 
: 0) |
                       si_get_flush_flags(sctx, SI_COHERENCY_SHADER, L2_STREAM);
        ctx->bind_compute_state(ctx, saved_cs);
        ctx->set_shader_images(ctx, PIPE_SHADER_COMPUTE, 0, 1, &saved_image);
        ctx->set_constant_buffer(ctx, PIPE_SHADER_COMPUTE, 0, &saved_cb);
        si_compute_internal_end(sctx);
 }
diff --git a/src/gallium/drivers/radeonsi/si_get.c 
b/src/gallium/drivers/radeonsi/si_get.c
index a5cb209b59e..6fa67087c7d 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -153,20 +153,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
        case PIPE_CAP_INT64:
        case PIPE_CAP_INT64_DIVMOD:
        case PIPE_CAP_TGSI_CLOCK:
        case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
        case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
        case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
        case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
        case PIPE_CAP_TGSI_BALLOT:
        case PIPE_CAP_TGSI_VOTE:
        case PIPE_CAP_TGSI_FS_FBFETCH:
+       case PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK:
                return 1;
 
        case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
                return !SI_BIG_ENDIAN && sscreen->info.has_userptr;
 
        case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:
                return sscreen->info.has_gpu_reset_status_query ||
                       sscreen->info.has_gpu_reset_counter_query;
 
        case PIPE_CAP_TEXTURE_MULTISAMPLE:
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h 
b/src/gallium/drivers/radeonsi/si_pipe.h
index b3198d45ea6..b6858b46ec0 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -914,42 +914,20 @@ struct si_context {
        struct pipe_resource            *gsvs_ring;
        struct pipe_resource            *tess_rings;
        union pipe_color_union          *border_color_table; /* in CPU memory, 
any endian */
        struct si_resource              *border_color_buffer;
        union pipe_color_union          *border_color_map; /* in VRAM (slow 
access), little endian */
        unsigned                        border_color_count;
        unsigned                        num_vs_blit_sgprs;
        uint32_t                        
vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
        uint32_t                        cs_user_data[4];
 
-        /**
-         * last_block allows disabling threads at the farthermost grid 
boundary.
-         * Full blocks as specified by "block" are launched, but the threads
-         * outside of "last_block" dimensions are disabled.
-         *
-         * If a block touches the grid boundary in the i-th axis, threads with
-         * THREAD_ID[i] >= last_block[i] are disabled.
-         *
-         * If last_block[i] is 0, it has the same behavior as last_block[i] = 
block[i],
-         * meaning no effect.
-         *
-         * It's equivalent to doing this at the beginning of the compute 
shader:
-         *
-         *   for (i = 0; i < 3; i++) {
-         *      if (block_id[i] == grid[i] - 1 &&
-         *          last_block[i] && last_block[i] >= thread_id[i])
-         *         return;
-         *   }
-         * (this could be moved into pipe_grid_info)
-         */
-        uint compute_last_block[3];
-
        /* Vertex and index buffers. */
        bool                            vertex_buffers_dirty;
        bool                            vertex_buffer_pointer_dirty;
        struct pipe_vertex_buffer       vertex_buffer[SI_NUM_VERTEX_BUFFERS];
 
        /* MSAA config state. */
        int                             ps_iter_samples;
        bool                            ps_uses_fbfetch;
        bool                            smoothing_enabled;
 
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index e2b0104ce43..d4732dc257f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -851,20 +851,21 @@ enum pipe_cap
    PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS,
    PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET,
    PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET,
    PIPE_CAP_SURFACE_SAMPLE_COUNT,
    PIPE_CAP_TGSI_ATOMFADD,
    PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE,
    PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND,
    PIPE_CAP_DEST_SURFACE_SRGB_CONTROL,
    PIPE_CAP_NIR_COMPACT_ARRAYS,
    PIPE_CAP_MAX_VARYINGS,
+   PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK,
 };
 
 /**
  * Possible bits for PIPE_CAP_CONTEXT_PRIORITY_MASK param, which should
  * return a bitmask of the supported priorities.  If the driver does not
  * support prioritized contexts, it can return 0.
  *
  * Note that these match __DRI2_RENDER_HAS_CONTEXT_PRIORITY_*
  */
 #define PIPE_CONTEXT_PRIORITY_LOW     (1 << 0)
diff --git a/src/gallium/include/pipe/p_state.h 
b/src/gallium/include/pipe/p_state.h
index 38052e5fd3d..3a91ddd71b5 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -831,20 +831,41 @@ struct pipe_grid_info
     * clEnqueueNDRangeKernel. Note block[] and grid[] must be padded with
     * 1 for non-used dimensions.
     */
    uint work_dim;
 
    /**
     * Determine the layout of the working block (in thread units) to be used.
     */
    uint block[3];
 
+   /**
+    * last_block allows disabling threads at the farthermost grid boundary.
+    * Full blocks as specified by "block" are launched, but the threads
+    * outside of "last_block" dimensions are disabled.
+    *
+    * If a block touches the grid boundary in the i-th axis, threads with
+    * THREAD_ID[i] >= last_block[i] are disabled.
+    *
+    * If last_block[i] is 0, it has the same behavior as last_block[i] = 
block[i],
+    * meaning no effect.
+    *
+    * It's equivalent to doing this at the beginning of the compute shader:
+    *
+    *   for (i = 0; i < 3; i++) {
+    *      if (block_id[i] == grid[i] - 1 &&
+    *          last_block[i] && thread_id[i] >= last_block[i])
+    *         return;
+    *   }
+    */
+   uint last_block[3];
+
    /**
     * Determine the layout of the grid (in block units) to be used.
     */
    uint grid[3];
 
    /* Indirect compute parameters resource: If not NULL, block sizes are taken
     * from this buffer instead, which is laid out as follows:
     *
     *  struct {
     *     uint32_t num_blocks_x;
-- 
2.17.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to