This artificially converts a buffer into a 8K x N 2D texture to fetch
texels from. As a result we can access up to 8K x 8K texels on a3xx, and
16K x 16K on a4xx. This could be further expanded into 3D space if
necessary, but 64M should be enough.

We have to check out-of-bounds conditions in the shader since otherwise
we wouldn't be able to prevent a situation where the last line of the
texture covers unallocated pages.

Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu>
---

The limits we were previous allowing are too small. The spec requires at least 
64K.

 src/gallium/drivers/freedreno/a3xx/fd3_texture.c   | 15 +++++---
 src/gallium/drivers/freedreno/a4xx/fd4_texture.c   | 13 +++----
 src/gallium/drivers/freedreno/freedreno_screen.c   |  7 ++--
 .../drivers/freedreno/ir3/ir3_compiler_nir.c       | 40 ++++++++++++++++++++--
 src/gallium/drivers/freedreno/ir3/ir3_shader.c     | 32 +++++++++++++++++
 src/gallium/drivers/freedreno/ir3/ir3_shader.h     |  7 +++-
 6 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index 94caaed..875bd49 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -194,10 +194,10 @@ tex_type(unsigned target)
        switch (target) {
        default:
                assert(0);
-       case PIPE_BUFFER:
        case PIPE_TEXTURE_1D:
        case PIPE_TEXTURE_1D_ARRAY:
                return A3XX_TEX_1D;
+       case PIPE_BUFFER:
        case PIPE_TEXTURE_RECT:
        case PIPE_TEXTURE_2D:
        case PIPE_TEXTURE_2D_ARRAY:
@@ -238,11 +238,16 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct 
pipe_resource *prsc,
                so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
 
        if (prsc->target == PIPE_BUFFER) {
+               unsigned elements =
+                       cso->u.buf.size / 
util_format_get_blocksize(cso->format);
                lvl = 0;
                so->texconst1 =
                        
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
-                       A3XX_TEX_CONST_1_WIDTH(cso->u.buf.size / 
util_format_get_blocksize(cso->format)) |
-                       A3XX_TEX_CONST_1_HEIGHT(1);
+                       A3XX_TEX_CONST_1_WIDTH(MIN2(elements, 8192)) |
+                       A3XX_TEX_CONST_1_HEIGHT(DIV_ROUND_UP(elements, 8192));
+               so->texconst2 =
+                       A3XX_TEX_CONST_2_PITCH(MIN2(elements, 8192) *
+                                                                  
util_format_get_blocksize(cso->format));
        } else {
                unsigned miplevels;
 
@@ -254,10 +259,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct 
pipe_resource *prsc,
                        
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
                        A3XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) |
                        A3XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl));
+               so->texconst2 =
+                       A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, 
rsc->slices[lvl].pitch) * rsc->cpp);
        }
        /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
-       so->texconst2 =
-                       A3XX_TEX_CONST_2_PITCH(fd3_pipe2nblocksx(cso->format, 
rsc->slices[lvl].pitch) * rsc->cpp);
        switch (prsc->target) {
        case PIPE_TEXTURE_1D_ARRAY:
        case PIPE_TEXTURE_2D_ARRAY:
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c 
b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
index 4faecee..06645ca 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -195,10 +195,10 @@ tex_type(unsigned target)
        switch (target) {
        default:
                assert(0);
-       case PIPE_BUFFER:
        case PIPE_TEXTURE_1D:
        case PIPE_TEXTURE_1D_ARRAY:
                return A4XX_TEX_1D;
+       case PIPE_BUFFER:
        case PIPE_TEXTURE_RECT:
        case PIPE_TEXTURE_2D:
        case PIPE_TEXTURE_2D_ARRAY:
@@ -249,15 +249,16 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct 
pipe_resource *prsc,
        }
 
        if (cso->target == PIPE_BUFFER) {
-               unsigned elements = cso->u.buf.size / 
util_format_get_blocksize(cso->format);
-
+               unsigned elements =
+                       cso->u.buf.size / 
util_format_get_blocksize(cso->format);
                lvl = 0;
                so->texconst1 =
-                       A4XX_TEX_CONST_1_WIDTH(elements) |
-                       A4XX_TEX_CONST_1_HEIGHT(1);
+                       A4XX_TEX_CONST_1_WIDTH(MIN2(elements, 16384)) |
+                       A4XX_TEX_CONST_1_HEIGHT(DIV_ROUND_UP(elements, 16384));
                so->texconst2 =
                        
A4XX_TEX_CONST_2_FETCHSIZE(fd4_pipe2fetchsize(cso->format)) |
-                       A4XX_TEX_CONST_2_PITCH(elements * rsc->cpp);
+                       A4XX_TEX_CONST_2_PITCH(MIN2(elements, 16384) *
+                                                                  
util_format_get_blocksize(cso->format));
                so->offset = cso->u.buf.offset;
        } else {
                unsigned miplevels;
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index fbdd1e2..c67e2c0 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -218,11 +218,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum 
pipe_cap param)
                if (is_a4xx(screen)) return 32;
                return 0;
        case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
-               /* We could possibly emulate more by pretending 2d/rect 
textures and
-                * splitting high bits of index into 2nd dimension..
-                */
-               if (is_a3xx(screen)) return 8192;
-               if (is_a4xx(screen)) return 16384;
+               if (is_a3xx(screen)) return 8192 * 8192;
+               if (is_a4xx(screen)) return 16384 * 16384;
                return 0;
 
        case PIPE_CAP_TEXTURE_FLOAT_LINEAR:
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c 
b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index b1b9d6b..4a5a5f6 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -113,6 +113,9 @@ struct ir3_compile {
 
        unsigned max_texture_index;
 
+       /* a3xx and a4xx have different max texture sizes */
+       uint8_t max_texture_size_log2;
+
        /* set if we encounter something we can't handle yet, so we
         * can bail cleanly and fallback to TGSI compiler f/e
         */
@@ -136,6 +139,7 @@ compile_init(struct ir3_compiler *compiler,
                ctx->levels_add_one = false;
                ctx->unminify_coords = false;
                ctx->array_index_add_half = true;
+               ctx->max_texture_size_log2 = 14;
 
                if (so->type == SHADER_VERTEX)
                        ctx->astc_srgb = so->key.vastc_srgb;
@@ -148,6 +152,7 @@ compile_init(struct ir3_compiler *compiler,
                ctx->levels_add_one = true;
                ctx->unminify_coords = true;
                ctx->array_index_add_half = false;
+               ctx->max_texture_size_log2 = 13;
        }
 
        ctx->compiler = compiler;
@@ -187,6 +192,7 @@ compile_init(struct ir3_compiler *compiler,
         *
         *    num_uniform * vec4  -  user consts
         *    4 * vec4            -  UBO addresses
+        *    4 * vec4            -  TBO lengths
         *    if (vertex shader) {
         *        N * vec4        -  driver params (IR3_DP_*)
         *        1 * vec4        -  stream-out addresses
@@ -199,6 +205,9 @@ compile_init(struct ir3_compiler *compiler,
        /* reserve 4 (vec4) slots for ubo base addresses: */
        so->first_immediate += 4;
 
+       /* reserve 4 (vec4) slots for tbo lengths: */
+       so->first_immediate += 4;
+
        if (so->type == SHADER_VERTEX) {
                /* driver params (see ir3_driver_param): */
                so->first_immediate += IR3_DP_COUNT/4;  /* convert to vec4 */
@@ -1340,6 +1349,14 @@ tex_info(nir_tex_instr *tex, unsigned *flagsp, unsigned 
*coordsp)
        *coordsp = coords;
 }
 
+/* fetch the number of elements in the TBO: */
+static struct ir3_instruction *
+tex_tbo_length(struct ir3_compile *ctx, unsigned tex_idx)
+{
+       unsigned tbo = regid(ctx->so->first_driver_param + IR3_TBOS_OFF, 0);
+       return create_uniform(ctx, tbo + tex_idx);
+}
+
 static void
 emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
 {
@@ -1414,6 +1431,8 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
                return;
        }
 
+       unsigned tex_idx = tex->texture_index;
+
        tex_info(tex, &flags, &coords);
 
        /*
@@ -1440,7 +1459,24 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
                        src0[i] = ir3_SHL_B(b, src0[i], 0, lod, 0);
        }
 
-       if (coords == 1) {
+       if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
+               /* need to clamp the coordinate to the number of elements 
manually */
+               struct ir3_instruction *elements = tex_tbo_length(ctx, tex_idx);
+               struct ir3_instruction *cond =
+                       ir3_CMPS_U(ctx->block, coord[0], 0, elements, 0);
+               cond->cat2.condition = IR3_COND_LT;
+
+               src0[0] = ir3_AND_B(b, coord[0], 0,
+                                                       create_immed(b, (1 << 
ctx->max_texture_size_log2) - 1), 0);
+               src0[nsrc0++] = ir3_SHR_B(b, coord[0], 0,
+                                                                 
create_immed(b, ctx->max_texture_size_log2), 0);
+
+               /* If the coordinate is out of range, set it to -1 */
+               src0[0] = ir3_SEL_B32(b, src0[0], 0, cond, 0, create_immed(b, 
~0U), 0);
+               src0[1] = ir3_SEL_B32(b, src0[1], 0, cond, 0, create_immed(b, 
~0U), 0);
+
+               ctx->so->has_tbo = true;
+       } else if (coords == 1) {
                /* hw doesn't do 1d, so we treat it as 2d with
                 * height of 1, and patch up the y coord.
                 * TODO: y coord should be (int)0 in some cases..
@@ -1518,8 +1554,6 @@ emit_tex(struct ir3_compile *ctx, nir_tex_instr *tex)
        if (opc == OPC_GETLOD)
                type = TYPE_U32;
 
-       unsigned tex_idx = tex->texture_index;
-
        ctx->max_texture_index = MAX2(ctx->max_texture_index, tex_idx);
 
        struct ir3_instruction *col0 = create_collect(b, src0, nsrc0);
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c 
b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index 76460d9..f654bef 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -552,6 +552,33 @@ emit_ubos(struct fd_context *ctx, const struct 
ir3_shader_variant *v,
        }
 }
 
+/* emit tbo element lengths: */
+static void
+emit_tbos(struct fd_context *ctx, const struct ir3_shader_variant *v,
+               struct fd_ringbuffer *ring, struct fd_texture_stateobj 
*textures)
+{
+       uint32_t offset = v->first_driver_param + IR3_TBOS_OFF;
+       if (v->constlen > offset) {
+               uint32_t params = MIN2(4, v->constlen - offset) * 4;
+               uint32_t lengths[params];
+
+               for (uint32_t i = 0; i < params; i++) {
+                       struct pipe_sampler_view *tex = textures->textures[i];
+
+                       if (tex && tex->texture && tex->target == PIPE_BUFFER) {
+                               lengths[i] =
+                                       tex->u.buf.size / 
util_format_get_blocksize(tex->format);
+                       } else {
+                               lengths[i] = 0;
+                       }
+               }
+
+               fd_wfi(ctx->batch, ring);
+               ctx->emit_const(ring, v->type, offset * 4, 0,
+                                               params, lengths, NULL);
+       }
+}
+
 static void
 emit_immediates(struct fd_context *ctx, const struct ir3_shader_variant *v,
                struct fd_ringbuffer *ring)
@@ -658,13 +685,16 @@ ir3_emit_consts(const struct ir3_shader_variant *v, 
struct fd_ringbuffer *ring,
 {
        if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
                struct fd_constbuf_stateobj *constbuf;
+               struct fd_texture_stateobj *textures;
                bool shader_dirty;
 
                if (v->type == SHADER_VERTEX) {
                        constbuf = &ctx->constbuf[PIPE_SHADER_VERTEX];
+                       textures = &ctx->verttex;
                        shader_dirty = !!(dirty & FD_SHADER_DIRTY_VP);
                } else if (v->type == SHADER_FRAGMENT) {
                        constbuf = &ctx->constbuf[PIPE_SHADER_FRAGMENT];
+                       textures = &ctx->fragtex;
                        shader_dirty = !!(dirty & FD_SHADER_DIRTY_FP);
                } else {
                        unreachable("bad shader type");
@@ -673,6 +703,8 @@ ir3_emit_consts(const struct ir3_shader_variant *v, struct 
fd_ringbuffer *ring,
 
                emit_user_consts(ctx, v, ring, constbuf);
                emit_ubos(ctx, v, ring, constbuf);
+               if (v->has_tbo)
+                       emit_tbos(ctx, v, ring, textures);
                if (shader_dirty)
                        emit_immediates(ctx, v, ring);
        }
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h 
b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index 8c9483e..da8996c 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -50,6 +50,7 @@ enum ir3_driver_param {
  *
  *    num_uniform * vec4  -  user consts
  *    4 * vec4            -  UBO addresses
+ *    4 * vec4            -  TBO lengths
  *    if (vertex shader) {
  *        N * vec4        -  driver params (IR3_DP_*)
  *        1 * vec4        -  stream-out addresses
@@ -59,7 +60,8 @@ enum ir3_driver_param {
  * that we don't need..
  */
 #define IR3_UBOS_OFF         0  /* UBOs after user consts */
-#define IR3_DRIVER_PARAM_OFF 4  /* driver params after UBOs */
+#define IR3_TBOS_OFF         4  /* TBO lengths after UBOs */
+#define IR3_DRIVER_PARAM_OFF 8  /* driver params after TBO lengths */
 #define IR3_TFBOS_OFF       (IR3_DRIVER_PARAM_OFF + IR3_DP_COUNT/4)
 
 /* Configuration key used to identify a shader variant.. different
@@ -213,6 +215,9 @@ struct ir3_shader_variant {
        /* do we have one or more texture sample instructions: */
        bool has_samp;
 
+       /* do we have texture buffer accesses: */
+       bool has_tbo;
+
        /* do we have kill instructions: */
        bool has_kill;
 
-- 
2.7.3

_______________________________________________
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno

Reply via email to